diff --git a/L2L_fineweb-100b_N1.1e08_D1.5e10_C1.0e19/README.md b/L2L_fineweb-100b_N1.1e08_D1.5e10_C1.0e19/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_fineweb-100b_N1.1e08_D1.5e10_C1.0e19/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_fineweb-100b_N1.1e08_D1.5e10_C1.0e19/config.json b/L2L_fineweb-100b_N1.1e08_D1.5e10_C1.0e19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..41baa877f3632c674fd6e77965ab3ee5a2a819e1 --- /dev/null +++ b/L2L_fineweb-100b_N1.1e08_D1.5e10_C1.0e19/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 704, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 11, + "n_kv_heads": null, + "n_layers": 11, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_fineweb-100b_N1.1e08_D6.6e08_C4.4e17/model.safetensors b/L2L_fineweb-100b_N1.1e08_D6.6e08_C4.4e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49ff13f2829c28f6fc261eba21a1ef2258fd0fa9 --- /dev/null +++ b/L2L_fineweb-100b_N1.1e08_D6.6e08_C4.4e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a083af9fbca68fdf008655c6f2be7b60be6fb355c62644eb9bdc412eaf5c466 +size 442045664 diff --git a/L2L_fineweb-100b_N1.3e08_D1.2e09_C9.6e17/model.safetensors b/L2L_fineweb-100b_N1.3e08_D1.2e09_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28b170a9e762fcbdf75058601fa705c4cc32c132 --- /dev/null +++ b/L2L_fineweb-100b_N1.3e08_D1.2e09_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97d4cdb278ec9737499d78eb7ccdb10e980885361366ddd3be0d9e4db03bc70 +size 536507728 diff --git a/L2L_fineweb-100b_N1.3e08_D1.2e10_C1.0e19/model.safetensors b/L2L_fineweb-100b_N1.3e08_D1.2e10_C1.0e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b0332076bf18d70c202292fae40005aaa0a4359 --- /dev/null +++ b/L2L_fineweb-100b_N1.3e08_D1.2e10_C1.0e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c0b3bc701dc772378d30faa30e13902b9c2422880ddb5ef64c673b97356c1e +size 536507728 diff --git a/L2L_fineweb-100b_N2.0e07_D1.7e09_C2.0e17/model.safetensors b/L2L_fineweb-100b_N2.0e07_D1.7e09_C2.0e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..46f70b4b0f507e9321f9b5b43cfab3c3a4a0657e --- /dev/null +++ b/L2L_fineweb-100b_N2.0e07_D1.7e09_C2.0e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3edc10e17b912ac0023608ad6e42d31e485ff0d328047b3b437fd0e7009295d +size 78139936 diff --git a/L2L_fineweb-100b_N2.3e08_D3.5e10_C4.8e19/model.safetensors b/L2L_fineweb-100b_N2.3e08_D3.5e10_C4.8e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c35f9ecb306613f4761cc0d56e26bac51459a003 --- /dev/null +++ b/L2L_fineweb-100b_N2.3e08_D3.5e10_C4.8e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1295ad3fe74e3918a8fbb9ced691b0e19d2c33f503f5c510cdf7f0d9dd808a8a +size 909559448 diff --git a/L2L_fineweb-100b_N2.7e08_D2.9e09_C4.6e18/README.md b/L2L_fineweb-100b_N2.7e08_D2.9e09_C4.6e18/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_fineweb-100b_N2.7e08_D2.9e09_C4.6e18/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_fineweb-100b_N2.7e08_D2.9e09_C4.6e18/config.json b/L2L_fineweb-100b_N2.7e08_D2.9e09_C4.6e18/config.json new file mode 100644 index 0000000000000000000000000000000000000000..13cb78433c2752f971a3e07be8753b97f7077d57 --- /dev/null +++ b/L2L_fineweb-100b_N2.7e08_D2.9e09_C4.6e18/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 1024, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 16, + "n_kv_heads": null, + "n_layers": 16, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_fineweb-100b_N3.5e07_D2.1e09_C4.4e17/model.safetensors b/L2L_fineweb-100b_N3.5e07_D2.1e09_C4.4e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74e8f2329ab7f02e166cf4aa8f297fb59d4f98d0 --- /dev/null +++ b/L2L_fineweb-100b_N3.5e07_D2.1e09_C4.4e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc98d7283b467efd53e18f26074d655533e56622cefd66884924f9cf4b6d031 +size 140815064 diff --git a/L2L_fineweb-100b_N3.5e07_D9.5e08_C2.0e17/model.safetensors b/L2L_fineweb-100b_N3.5e07_D9.5e08_C2.0e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78aae1812ba857a1d282026844fba8d8f1b838b6 --- /dev/null +++ b/L2L_fineweb-100b_N3.5e07_D9.5e08_C2.0e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bedbd3355b1581e0b8b32d8976ead9e8c12a0c990b1057410c3c4add82a869ee +size 140815064 diff --git a/L2L_fineweb-100b_N3.6e08_D4.6e09_C1.0e19/model.safetensors b/L2L_fineweb-100b_N3.6e08_D4.6e09_C1.0e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad45542870c99ccffe5f3a4db06bf954377d09b7 --- /dev/null +++ b/L2L_fineweb-100b_N3.6e08_D4.6e09_C1.0e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f29ed77aedea6e170821127c6345defb9b3c50d938b876ffb89dbadbd0dd7f7 +size 1441882272 diff --git a/L2L_fineweb-100b_N4.2e08_D8.8e09_C2.2e19/model.safetensors b/L2L_fineweb-100b_N4.2e08_D8.8e09_C2.2e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42163023390a4c9bf6c824b53daaddf3eee69ed5 --- /dev/null +++ b/L2L_fineweb-100b_N4.2e08_D8.8e09_C2.2e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87398fb61e13ac2d56ed7599494851dec76be44808fda390553d8eca870dcbb6 +size 1660221752 diff --git a/L2L_fineweb-100b_N4.6e07_D3.5e09_C9.6e17/model.safetensors b/L2L_fineweb-100b_N4.6e07_D3.5e09_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c943634e14e86ebffe2444de13231d3d5ad7379 --- /dev/null +++ b/L2L_fineweb-100b_N4.6e07_D3.5e09_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9d7ec5af25505d755d26fe40562c9a800e338f577c7966a6eccbfebd92086d +size 182182704 diff --git a/L2L_fineweb-100b_N4.6e07_D7.7e09_C2.1e18/model.safetensors b/L2L_fineweb-100b_N4.6e07_D7.7e09_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93c2a5667669fd7850ac6af7711b0b07cf8e1604 --- /dev/null +++ b/L2L_fineweb-100b_N4.6e07_D7.7e09_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f956c29927e288e899fec97a25e2ab30f4d651003f34f399504d8c81376986 +size 182182704 diff --git a/L2L_fineweb-100b_N5.8e07_D2.8e09_C9.6e17/model.safetensors b/L2L_fineweb-100b_N5.8e07_D2.8e09_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..acad3a4c4e73c4326789ff1f8371b1ec8dc278f4 --- /dev/null +++ b/L2L_fineweb-100b_N5.8e07_D2.8e09_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634c3cfdde46da0f18ab94fab128e740575f234415e367ff89f57cdcd4068b22 +size 231809928 diff --git a/L2L_fineweb-100b_N5.8e07_D6.0e09_C2.1e18/model.safetensors b/L2L_fineweb-100b_N5.8e07_D6.0e09_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccda9dabbe0fc19d6f1948ad419802f3104ea724 --- /dev/null +++ b/L2L_fineweb-100b_N5.8e07_D6.0e09_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:280d3a415ab1f97935a2ebb392163ded1f6fed577321c698f0a57c731584b4c4 +size 231809928 diff --git a/L2L_fineweb-100b_N7.3e07_D1.0e09_C4.4e17/model.safetensors b/L2L_fineweb-100b_N7.3e07_D1.0e09_C4.4e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b03165fcf833b9eb67bfeeb2ebd2c77a0fe5b2c4 --- /dev/null +++ b/L2L_fineweb-100b_N7.3e07_D1.0e09_C4.4e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7efc8eaef857ee25861f05e473b43c8607ba9ac4b45a9920fb8b2c7601e85195 +size 290876416 diff --git a/L2L_fineweb-edu-100b_N1.1e08_D3.2e09_C2.1e18/model.safetensors b/L2L_fineweb-edu-100b_N1.1e08_D3.2e09_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5da8b0309dafe77efb67672fbab29a796af352b4 --- /dev/null +++ b/L2L_fineweb-edu-100b_N1.1e08_D3.2e09_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4a988bc760ff3d6c3d692f7c21bfcfc0b30797a56f7fa86a4cba0ad5bef107 +size 442045664 diff --git a/L2L_fineweb-edu-100b_N1.1e08_D6.6e08_C4.4e17/model.safetensors b/L2L_fineweb-edu-100b_N1.1e08_D6.6e08_C4.4e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..631cf7186f364af5dd7bb5f1787d3536d5356453 --- /dev/null +++ b/L2L_fineweb-edu-100b_N1.1e08_D6.6e08_C4.4e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe5cfc1322d19a109a365915086a36a816c435bb74ab63026307c8e0fc114db0 +size 442045664 diff --git a/L2L_fineweb-edu-100b_N1.2e09_D3.1e09_C2.2e19/README.md b/L2L_fineweb-edu-100b_N1.2e09_D3.1e09_C2.2e19/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_fineweb-edu-100b_N1.2e09_D3.1e09_C2.2e19/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_fineweb-edu-100b_N1.2e09_D3.1e09_C2.2e19/config.json b/L2L_fineweb-edu-100b_N1.2e09_D3.1e09_C2.2e19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4bdcd191680e717f3ce8224f0b2d84783fa6685d --- /dev/null +++ b/L2L_fineweb-edu-100b_N1.2e09_D3.1e09_C2.2e19/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 1792, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 28, + "n_kv_heads": null, + "n_layers": 28, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_fineweb-edu-100b_N2.0e07_D1.7e09_C2.0e17/model.safetensors b/L2L_fineweb-edu-100b_N2.0e07_D1.7e09_C2.0e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bbabcf0de52c982e40a14c9f99c946da6b1869d --- /dev/null +++ b/L2L_fineweb-edu-100b_N2.0e07_D1.7e09_C2.0e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7220c66488fa59edc3f0e887472dd5f52ba37743ae4a4cbef5c1b629454d5958 +size 78139936 diff --git a/L2L_fineweb-edu-100b_N2.3e08_D7.3e09_C1.0e19/README.md b/L2L_fineweb-edu-100b_N2.3e08_D7.3e09_C1.0e19/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_fineweb-edu-100b_N2.3e08_D7.3e09_C1.0e19/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_fineweb-edu-100b_N2.3e08_D7.3e09_C1.0e19/config.json b/L2L_fineweb-edu-100b_N2.3e08_D7.3e09_C1.0e19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..75ecc120927724265e2dea013d0855357144913b --- /dev/null +++ b/L2L_fineweb-edu-100b_N2.3e08_D7.3e09_C1.0e19/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 960, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 15, + "n_kv_heads": null, + "n_layers": 15, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_fineweb-edu-100b_N3.1e08_D2.4e09_C4.6e18/model.safetensors b/L2L_fineweb-edu-100b_N3.1e08_D2.4e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62828be836574036ab9461b3f73178edce6b5982 --- /dev/null +++ b/L2L_fineweb-edu-100b_N3.1e08_D2.4e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:549b1f9cf3d1a1eb46e1c279960dcb66d72e9715dc8940db032d4828d08f90ff +size 1244778504 diff --git a/L2L_fineweb-edu-100b_N3.5e07_D2.1e09_C4.4e17/model.safetensors b/L2L_fineweb-edu-100b_N3.5e07_D2.1e09_C4.4e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d38465c4f2b16f79b833ba53c0c3fcb2363dfc24 --- /dev/null +++ b/L2L_fineweb-edu-100b_N3.5e07_D2.1e09_C4.4e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8460ee87fee16bd691c50e5ff73cbff58c054b504142d270a22eacd82602adb4 +size 140815064 diff --git a/L2L_fineweb-edu-100b_N3.6e08_D4.6e09_C1.0e19/README.md b/L2L_fineweb-edu-100b_N3.6e08_D4.6e09_C1.0e19/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_fineweb-edu-100b_N3.6e08_D4.6e09_C1.0e19/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_fineweb-edu-100b_N3.6e08_D4.6e09_C1.0e19/config.json b/L2L_fineweb-edu-100b_N3.6e08_D4.6e09_C1.0e19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..21ae7419e0c137b5fa021ce7dddedae2872e4ffd --- /dev/null +++ b/L2L_fineweb-edu-100b_N3.6e08_D4.6e09_C1.0e19/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 1152, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 18, + "n_kv_heads": null, + "n_layers": 18, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_fineweb-edu-100b_N4.6e07_D1.6e09_C4.4e17/README.md b/L2L_fineweb-edu-100b_N4.6e07_D1.6e09_C4.4e17/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_fineweb-edu-100b_N4.6e07_D1.6e09_C4.4e17/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_fineweb-edu-100b_N4.6e07_D1.6e09_C4.4e17/config.json b/L2L_fineweb-edu-100b_N4.6e07_D1.6e09_C4.4e17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6ddcaca5a561d50cfa15ad13fe288fd743cf7562 --- /dev/null +++ b/L2L_fineweb-edu-100b_N4.6e07_D1.6e09_C4.4e17/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 448, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 7, + "n_kv_heads": null, + "n_layers": 7, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_fineweb-edu-100b_N4.8e08_D1.6e09_C4.6e18/model.safetensors b/L2L_fineweb-edu-100b_N4.8e08_D1.6e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bef0fc961f7018c33a63e928df36dd1a46aecbe4 --- /dev/null +++ b/L2L_fineweb-edu-100b_N4.8e08_D1.6e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:267c1b6222161e79e074d164ceca4f47119f015fdc1ba5fd3580ca080906bcf8 +size 1900976584 diff --git a/L2L_fineweb-edu-100b_N5.4e08_D6.8e09_C2.2e19/model.safetensors b/L2L_fineweb-edu-100b_N5.4e08_D6.8e09_C2.2e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6fc3b362ea81518f7198b6a244093b4a4bbf73e2 --- /dev/null +++ b/L2L_fineweb-edu-100b_N5.4e08_D6.8e09_C2.2e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a0aa3c1a01080623917027fb862f56b917dc8a9e75130c74bdea8d47faee27 +size 2165326432 diff --git a/L2L_fineweb-edu-100b_N5.8e07_D2.8e09_C9.6e17/model.safetensors b/L2L_fineweb-edu-100b_N5.8e07_D2.8e09_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c172655be599fa8ffcc27d7fdddc16a9e9b416f0 --- /dev/null +++ b/L2L_fineweb-edu-100b_N5.8e07_D2.8e09_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42d33b285d3f98c86dd53e9aa332e5a76499999b8ffff38a0f2e29b3c456b08 +size 231809928 diff --git a/L2L_fineweb-edu-100b_N9.0e07_D1.8e09_C9.6e17/model.safetensors b/L2L_fineweb-edu-100b_N9.0e07_D1.8e09_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c237d2ab71a4ed898d839292095cce2726e52eb9 --- /dev/null +++ b/L2L_fineweb-edu-100b_N9.0e07_D1.8e09_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b0716dcffb47d06ea19cd6019d690d362f784e041b078a49df764ad97a390b +size 360561776 diff --git a/L2L_fineweb-edu-100b_N9.0e07_D3.7e08_C2.0e17/model.safetensors b/L2L_fineweb-edu-100b_N9.0e07_D3.7e08_C2.0e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77ed1407c5f7a7f91e8e846bc0b8bb63ecb2e630 --- /dev/null +++ b/L2L_fineweb-edu-100b_N9.0e07_D3.7e08_C2.0e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:363950634745aaedf3d4ffd42ddccf42fcab3710f7a1fcd81f4776da287fdbff +size 360561776 diff --git a/L2L_fineweb-edu-100b_N9.0e07_D8.1e08_C4.4e17/model.safetensors b/L2L_fineweb-edu-100b_N9.0e07_D8.1e08_C4.4e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0950dfe47559a299c106d279b76086ff69ab05e --- /dev/null +++ b/L2L_fineweb-edu-100b_N9.0e07_D8.1e08_C4.4e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5fefaa58a5a2228997c2785c91dc32828bcb44ce9b9f9c145a7ca5d2e272aaa +size 360561776 diff --git a/L2L_fineweb-edu-100b_N9.0e07_D8.5e09_C4.6e18/model.safetensors b/L2L_fineweb-edu-100b_N9.0e07_D8.5e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..876fb0a75f8c13cc1038ea224ab661968501a673 --- /dev/null +++ b/L2L_fineweb-edu-100b_N9.0e07_D8.5e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78eebeb0a0cac3224246dd0ea2ff4b122a779b1b1c04c0856e2c310a864d512c +size 360561776 diff --git a/L2L_proof-pile-2_N1.1e08_D6.6e08_C4.4e17/README.md b/L2L_proof-pile-2_N1.1e08_D6.6e08_C4.4e17/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_proof-pile-2_N1.1e08_D6.6e08_C4.4e17/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_proof-pile-2_N1.1e08_D6.6e08_C4.4e17/config.json b/L2L_proof-pile-2_N1.1e08_D6.6e08_C4.4e17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..41baa877f3632c674fd6e77965ab3ee5a2a819e1 --- /dev/null +++ b/L2L_proof-pile-2_N1.1e08_D6.6e08_C4.4e17/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 704, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 11, + "n_kv_heads": null, + "n_layers": 11, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_proof-pile-2_N1.1e08_D6.9e09_C4.6e18/model.safetensors b/L2L_proof-pile-2_N1.1e08_D6.9e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..744b6d5627a372af4ad3833e9e4266ab4744d82b --- /dev/null +++ b/L2L_proof-pile-2_N1.1e08_D6.9e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c260c4cd8396de7700df68e82afa1a03391f863d01006de53c439e43d2b3a72 +size 442045664 diff --git a/L2L_proof-pile-2_N1.6e08_D4.5e08_C4.4e17/README.md b/L2L_proof-pile-2_N1.6e08_D4.5e08_C4.4e17/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_proof-pile-2_N1.6e08_D4.5e08_C4.4e17/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_proof-pile-2_N1.6e08_D4.5e08_C4.4e17/config.json b/L2L_proof-pile-2_N1.6e08_D4.5e08_C4.4e17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eefab0956111af04f5cf1d2f3741df6009e664c0 --- /dev/null +++ b/L2L_proof-pile-2_N1.6e08_D4.5e08_C4.4e17/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 832, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 13, + "n_kv_heads": null, + "n_layers": 13, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_proof-pile-2_N1.9e08_D4.0e09_C4.6e18/model.safetensors b/L2L_proof-pile-2_N1.9e08_D4.0e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37e71c0850b000609d798f5bdb7a5ef7fe88cb5d --- /dev/null +++ b/L2L_proof-pile-2_N1.9e08_D4.0e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7106a6f7dd21c88c3a40605d122ab604d7ab3b053659ab0e5b6bcbb7ce423988 +size 769084968 diff --git a/L2L_proof-pile-2_N2.0e07_D1.7e09_C2.0e17/model.safetensors b/L2L_proof-pile-2_N2.0e07_D1.7e09_C2.0e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a4b30de14b5bfe08bc9c14ed75a0130ee36b89b --- /dev/null +++ b/L2L_proof-pile-2_N2.0e07_D1.7e09_C2.0e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3eb8af1d3e802889d140d0aa83404e2bd523a7f4080f0ab560cfe9bf248a77 +size 78139936 diff --git a/L2L_proof-pile-2_N2.3e08_D7.0e08_C9.6e17/model.safetensors b/L2L_proof-pile-2_N2.3e08_D7.0e08_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..167d82c16f02870d55029ea71c6b5dc5b12f07d9 --- /dev/null +++ b/L2L_proof-pile-2_N2.3e08_D7.0e08_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd145f1ce6470abdd016d07e54f6cf96da15608f7741dfb42d691cfd393f3774 +size 909559448 diff --git a/L2L_proof-pile-2_N2.7e08_D3.0e10_C4.8e19/model.safetensors b/L2L_proof-pile-2_N2.7e08_D3.0e10_C4.8e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da458a0a06d785db64e48df3e4d4e9ad2ac29315 --- /dev/null +++ b/L2L_proof-pile-2_N2.7e08_D3.0e10_C4.8e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b72345487d4ca9647f7702338d24845ab66949b8f77d0d9c15e26845388266 +size 1067730840 diff --git a/L2L_proof-pile-2_N2.7e08_D6.2e09_C1.0e19/model.safetensors b/L2L_proof-pile-2_N2.7e08_D6.2e09_C1.0e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6491538fa0ba1b1b4366054bc2fa83c8cc58d9c3 --- /dev/null +++ b/L2L_proof-pile-2_N2.7e08_D6.2e09_C1.0e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48bd8d3e7f7586a34a1560d6fe078f53d38e6c74b9ba67e1f9570fe887441358 +size 1067730840 diff --git a/L2L_proof-pile-2_N3.5e07_D2.1e09_C4.4e17/model.safetensors b/L2L_proof-pile-2_N3.5e07_D2.1e09_C4.4e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59202ee83f97eb75be49da4a03ef910a74441b74 --- /dev/null +++ b/L2L_proof-pile-2_N3.5e07_D2.1e09_C4.4e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90778d1958fd68b93d81c0d837155feda3c4de8c0f89e807fbb0b634ae29ee20 +size 140815064 diff --git a/L2L_proof-pile-2_N3.5e07_D4.5e09_C9.6e17/model.safetensors b/L2L_proof-pile-2_N3.5e07_D4.5e09_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de7a132a0d670065d4d19c3f043c0cafdefbd9d3 --- /dev/null +++ b/L2L_proof-pile-2_N3.5e07_D4.5e09_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c0953b5cd3a1f2bd3dde88b1bc05e4f9bd852eb6e9c377a7c16dd325caeddc +size 140815064 diff --git a/L2L_proof-pile-2_N3.5e07_D9.5e08_C2.0e17/model.safetensors b/L2L_proof-pile-2_N3.5e07_D9.5e08_C2.0e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42d636f6d47ee7a1c3c6bafa747f9411cc012b98 --- /dev/null +++ b/L2L_proof-pile-2_N3.5e07_D9.5e08_C2.0e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6221ec86b23c6fa707b9ce0cc979849c06e79fbdf6f4f531a2cda286b12d647 +size 140815064 diff --git a/L2L_proof-pile-2_N3.6e08_D2.1e09_C4.6e18/model.safetensors b/L2L_proof-pile-2_N3.6e08_D2.1e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a65775ae7a78313c298b9434c3e642d489dd5c9 --- /dev/null +++ b/L2L_proof-pile-2_N3.6e08_D2.1e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936e0fe6c9f4fc35b9df8dbd98bf7b5f2a12d8f86f9846f8cd12328cc1462415 +size 1441882272 diff --git a/L2L_proof-pile-2_N4.2e08_D8.4e08_C2.1e18/model.safetensors b/L2L_proof-pile-2_N4.2e08_D8.4e08_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e9e25822198b56a03920a050e4ff4f03de0d0a2 --- /dev/null +++ b/L2L_proof-pile-2_N4.2e08_D8.4e08_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd7528668486897fa430c2c88cafed0a7a9421ad83b9254f95e23189f47a53f +size 1660221752 diff --git a/L2L_proof-pile-2_N7.8e08_D4.7e09_C2.2e19/model.safetensors b/L2L_proof-pile-2_N7.8e08_D4.7e09_C2.2e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a31604682620175a98afb30d8515fb2fff370986 --- /dev/null +++ b/L2L_proof-pile-2_N7.8e08_D4.7e09_C2.2e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc08087233383eb48be0ba47b4163a73d74c8b9051e6f0bda4d042e2dff2e056 +size 3111742472 diff --git a/L2L_slimpajama-chunk1_N1.1e08_D3.2e09_C2.1e18/model.safetensors b/L2L_slimpajama-chunk1_N1.1e08_D3.2e09_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfc64476782cf75dae104521130197e4f7b2fee4 --- /dev/null +++ b/L2L_slimpajama-chunk1_N1.1e08_D3.2e09_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae9af314a983da9cf070555ac0502824ff7a6a9d783c14da937374bcdf4ca48f +size 442045664 diff --git a/L2L_slimpajama-chunk1_N1.3e08_D5.7e09_C4.6e18/model.safetensors b/L2L_slimpajama-chunk1_N1.3e08_D5.7e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bf3f13b1e7682d1eee7e909279d44cbcfe1c7d2 --- /dev/null +++ b/L2L_slimpajama-chunk1_N1.3e08_D5.7e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af295e1be1831a95a4d9d9ef0936288fd6cffac0fa219dce9b98edf281e6907 +size 536507728 diff --git a/L2L_slimpajama-chunk1_N1.9e08_D8.3e08_C9.6e17/model.safetensors b/L2L_slimpajama-chunk1_N1.9e08_D8.3e08_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b5b8723490d92c23651be61f9108305353ca854 --- /dev/null +++ b/L2L_slimpajama-chunk1_N1.9e08_D8.3e08_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3680276d7bd769b897b5730617468b807041eba3e56f70827555a054da16b39 +size 769084968 diff --git a/L2L_slimpajama-chunk1_N2.7e08_D3.0e10_C4.8e19/README.md b/L2L_slimpajama-chunk1_N2.7e08_D3.0e10_C4.8e19/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_slimpajama-chunk1_N2.7e08_D3.0e10_C4.8e19/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_slimpajama-chunk1_N2.7e08_D3.0e10_C4.8e19/config.json b/L2L_slimpajama-chunk1_N2.7e08_D3.0e10_C4.8e19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..13cb78433c2752f971a3e07be8753b97f7077d57 --- /dev/null +++ b/L2L_slimpajama-chunk1_N2.7e08_D3.0e10_C4.8e19/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 1024, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 16, + "n_kv_heads": null, + "n_layers": 16, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_slimpajama-chunk1_N2.7e08_D6.0e08_C9.6e17/README.md b/L2L_slimpajama-chunk1_N2.7e08_D6.0e08_C9.6e17/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23bb8239b30ad636a1b592db0346c4753dc5325d --- /dev/null +++ b/L2L_slimpajama-chunk1_N2.7e08_D6.0e08_C9.6e17/README.md @@ -0,0 +1,9 @@ +--- +tags: +- model_hub_mixin +- pytorch_model_hub_mixin +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Library: [More Information Needed] +- Docs: [More Information Needed] \ No newline at end of file diff --git a/L2L_slimpajama-chunk1_N2.7e08_D6.0e08_C9.6e17/config.json b/L2L_slimpajama-chunk1_N2.7e08_D6.0e08_C9.6e17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..13cb78433c2752f971a3e07be8753b97f7077d57 --- /dev/null +++ b/L2L_slimpajama-chunk1_N2.7e08_D6.0e08_C9.6e17/config.json @@ -0,0 +1,39 @@ +{ + "activation_type": "gelu", + "alibi": false, + "alibi_bias_max": 8.0, + "attention_dropout": 0.0, + "attention_layer_norm": true, + "attention_layer_norm_with_affine": true, + "bias_for_layer_norm": false, + "block_group_size": 1, + "block_type": "sequential", + "clip_qkv": null, + "d_model": 1024, + "embedding_dropout": 0.0, + "embedding_size": 32000, + "eos_token_id": 2, + "flash_attention": false, + "include_bias": false, + "init_cutoff_factor": null, + "init_device": "cpu", + "init_fn": "mitchell", + "init_std": 0.02, + "layer_norm_type": "default", + "layer_norm_with_affine": true, + "max_sequence_length": 512, + "mlp_hidden_size": null, + "mlp_ratio": 4, + "multi_query_attention": false, + "n_heads": 16, + "n_kv_heads": null, + "n_layers": 16, + "pad_token_id": 2, + "precision": "amp_bf16", + "residual_dropout": 0.0, + "rope": true, + "rope_full_precision": true, + "scale_logits": false, + "vocab_size": 32000, + "weight_tying": false +} \ No newline at end of file diff --git a/L2L_slimpajama-chunk1_N4.6e07_D7.7e09_C2.1e18/model.safetensors b/L2L_slimpajama-chunk1_N4.6e07_D7.7e09_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..261893835cc9ceb5023ff20ed0a40eb582b90e8a --- /dev/null +++ b/L2L_slimpajama-chunk1_N4.6e07_D7.7e09_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc40d526285b77851be66bb75a3b4720916f311da8a4bc01a783bc7dd338308 +size 182182704 diff --git a/L2L_slimpajama-chunk1_N4.8e08_D1.6e09_C4.6e18/model.safetensors b/L2L_slimpajama-chunk1_N4.8e08_D1.6e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f94ae691bcdc2ab3b662c3e99bf03a00617f1fa8 --- /dev/null +++ b/L2L_slimpajama-chunk1_N4.8e08_D1.6e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d4e3713b7d8870266b8bf34edbb623c121339f60669202653313716bd72380d +size 1900976584 diff --git a/L2L_slimpajama-chunk1_N6.1e08_D6.0e09_C2.2e19/model.safetensors b/L2L_slimpajama-chunk1_N6.1e08_D6.0e09_C2.2e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03cf7e9f0c853dcec7e282cc7df778999aaeecfb --- /dev/null +++ b/L2L_slimpajama-chunk1_N6.1e08_D6.0e09_C2.2e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd4e22283860064370ff6b0516348cc9c8d4a4975c25a8366842b6ae1ad36eb +size 2454450920 diff --git a/L2L_slimpajama-chunk1_N9.0e07_D3.7e08_C2.0e17/model.safetensors b/L2L_slimpajama-chunk1_N9.0e07_D3.7e08_C2.0e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..614a5205a63eb5c3c5419a189d9d89fd471a4da7 --- /dev/null +++ b/L2L_slimpajama-chunk1_N9.0e07_D3.7e08_C2.0e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cda00c016e7cbb1a8f9a6a368f822f876884b778bc443aa4964603cb49c4e4f +size 360561776 diff --git a/L2L_smollm-corpus_N1.1e08_D3.2e09_C2.1e18/model.safetensors b/L2L_smollm-corpus_N1.1e08_D3.2e09_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f8dbf147eb8bc42cd26f11e9aea1f7e064fe63c --- /dev/null +++ b/L2L_smollm-corpus_N1.1e08_D3.2e09_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cfa2ccf68f204463314861e97873f96b0b08edf66918eb0691b7f072fad8503 +size 442045664 diff --git a/L2L_smollm-corpus_N2.0e07_D1.7e09_C2.0e17/model.safetensors b/L2L_smollm-corpus_N2.0e07_D1.7e09_C2.0e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea9332c0f2a6fe46c99e7d39968fe022634911e1 --- /dev/null +++ b/L2L_smollm-corpus_N2.0e07_D1.7e09_C2.0e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08952020f590f58f88ddba109f2c7e3f67a8f0bd550c57cc5bd5f9f0b0409e72 +size 78139936 diff --git a/L2L_smollm-corpus_N2.3e08_D3.5e10_C4.8e19/model.safetensors b/L2L_smollm-corpus_N2.3e08_D3.5e10_C4.8e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74f6fe8e456f762ac4735af26848aca28cff4f85 --- /dev/null +++ b/L2L_smollm-corpus_N2.3e08_D3.5e10_C4.8e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e877fca6bb87d836f2ac5cccc6425dbc80f678bafb3ff8ca0e118d6e6baed7 +size 909559448 diff --git a/L2L_smollm-corpus_N2.3e08_D7.0e08_C9.6e17/model.safetensors b/L2L_smollm-corpus_N2.3e08_D7.0e08_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6830c19308998e4890fcf3e103e25512b22d01b2 --- /dev/null +++ b/L2L_smollm-corpus_N2.3e08_D7.0e08_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac05df9a4a343b4427764ec5039cf9543e880769f9f28e5c6153b9d704af09e +size 909559448 diff --git a/L2L_smollm-corpus_N2.7e08_D1.4e10_C2.2e19/model.safetensors b/L2L_smollm-corpus_N2.7e08_D1.4e10_C2.2e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c45a4419c608042933672d4977dc15ec489a4d4c --- /dev/null +++ b/L2L_smollm-corpus_N2.7e08_D1.4e10_C2.2e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4458c0f451396d6d45b435f4309fb3c326563193f79b948823be1e05b414d37 +size 1067730840 diff --git a/L2L_smollm-corpus_N3.6e08_D9.7e08_C2.1e18/model.safetensors b/L2L_smollm-corpus_N3.6e08_D9.7e08_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39187fb491e939bd8915fc2737fc709fbcae0ffd --- /dev/null +++ b/L2L_smollm-corpus_N3.6e08_D9.7e08_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0771b5b7d71fcb7c85712c280f2d4d57a4458d449cbc2c967ae6137010cced43 +size 1441882272 diff --git a/L2L_smollm-corpus_N4.2e08_D4.0e09_C1.0e19/model.safetensors b/L2L_smollm-corpus_N4.2e08_D4.0e09_C1.0e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..934a9d7d5de15eedfd87aadec4af7d4f51b61246 --- /dev/null +++ b/L2L_smollm-corpus_N4.2e08_D4.0e09_C1.0e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b5f968e75047eda29eb234841525481ab99057f794528bb7d6fbc4d397c337 +size 1660221752 diff --git a/L2L_smollm-corpus_N4.2e08_D8.4e08_C2.1e18/model.safetensors b/L2L_smollm-corpus_N4.2e08_D8.4e08_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d324499c21b4a433cb22c1ff4d5d7b2bf358a26e --- /dev/null +++ b/L2L_smollm-corpus_N4.2e08_D8.4e08_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af90001e698d40c9163f354a9b4e70aad44a3907d4d1c9c00f7c426ca2955f01 +size 1660221752 diff --git a/L2L_smollm-corpus_N4.6e07_D3.5e09_C9.6e17/model.safetensors b/L2L_smollm-corpus_N4.6e07_D3.5e09_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa27b56369cccbf334c328bfeb53f93609b17315 --- /dev/null +++ b/L2L_smollm-corpus_N4.6e07_D3.5e09_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7378c78d5924cb73f7a46e231f018d01952060b8956bf1f3672dad190d08708e +size 182182704 diff --git a/L2L_smollm-corpus_N4.8e08_D1.7e10_C4.8e19/model.safetensors b/L2L_smollm-corpus_N4.8e08_D1.7e10_C4.8e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85b58989a7936124f2bf1a7faa761a53f5b26d67 --- /dev/null +++ b/L2L_smollm-corpus_N4.8e08_D1.7e10_C4.8e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc490e8a12bd3f469ce312fe2923f41a1da129587f369e43d260529a662890fa +size 1900976584 diff --git a/L2L_smollm-corpus_N4.8e08_D7.7e09_C2.2e19/model.safetensors b/L2L_smollm-corpus_N4.8e08_D7.7e09_C2.2e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffa04701e85b339fca094e6c60aa2a784702169d --- /dev/null +++ b/L2L_smollm-corpus_N4.8e08_D7.7e09_C2.2e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b8880e40e9a9eb0634a1b0d57c6dc744ab2d3042f499d3d0a8e6609e77d3ddb +size 1900976584 diff --git a/L2L_smollm-corpus_N5.8e07_D1.3e09_C4.4e17/model.safetensors b/L2L_smollm-corpus_N5.8e07_D1.3e09_C4.4e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bcbca9c9b8f788b10a578d8b38493ad5cefdb4dd --- /dev/null +++ b/L2L_smollm-corpus_N5.8e07_D1.3e09_C4.4e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e02e6b86aaa41c619a9dca5a8ba24989b613ee18ad452a0b9c9777af5108402 +size 231809928 diff --git a/L2L_smollm-corpus_N7.8e08_D2.1e09_C1.0e19/model.safetensors b/L2L_smollm-corpus_N7.8e08_D2.1e09_C1.0e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42da2de891e5cc73e346768a121e838562084aab --- /dev/null +++ b/L2L_smollm-corpus_N7.8e08_D2.1e09_C1.0e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36826452430121f50c30eaab0fb0304b8bd0d4f16027b4844e307f2ce1006109 +size 3111742472 diff --git a/L2L_smollm-corpus_N9.0e07_D3.9e09_C2.1e18/model.safetensors b/L2L_smollm-corpus_N9.0e07_D3.9e09_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6dc108c8d44960fc6c2739641f7beec088bbc69c --- /dev/null +++ b/L2L_smollm-corpus_N9.0e07_D3.9e09_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cf363abd849ffa828f5c5f8b3567fb6bd7bb25910a2da32314116810eac2320 +size 360561776 diff --git a/L2L_starcoder_N1.1e08_D1.4e09_C9.6e17/model.safetensors b/L2L_starcoder_N1.1e08_D1.4e09_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca83624af6f324d12a280e1120f7290dc0c206a2 --- /dev/null +++ b/L2L_starcoder_N1.1e08_D1.4e09_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398278c601b4bdb55b630bc3a4287d590660b56003a76872489371ccaaeab48b +size 442045664 diff --git a/L2L_starcoder_N1.3e08_D1.2e10_C1.0e19/model.safetensors b/L2L_starcoder_N1.3e08_D1.2e10_C1.0e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62779b66b3dd5090bf719a2e9010b1916cb09433 --- /dev/null +++ b/L2L_starcoder_N1.3e08_D1.2e10_C1.0e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:173df202011bcc605f6cac38ff2bf1a5c6bc91111c894ea1008471423502cff0 +size 536507728 diff --git a/L2L_starcoder_N1.3e08_D5.7e09_C4.6e18/model.safetensors b/L2L_starcoder_N1.3e08_D5.7e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fca16b29174443cb49d81cfbf844c5d4208bf6f --- /dev/null +++ b/L2L_starcoder_N1.3e08_D5.7e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac270ec9f74aa34a20a23d2d382afbeb07ba9872ffda2f34cb95c02e5cb4251 +size 536507728 diff --git a/L2L_starcoder_N2.3e08_D7.3e09_C1.0e19/model.safetensors b/L2L_starcoder_N2.3e08_D7.3e09_C1.0e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71ca4975c23304a5c0e2d9eeaa5a00fa81ece300 --- /dev/null +++ b/L2L_starcoder_N2.3e08_D7.3e09_C1.0e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534236ec13b667341454f89849c2a816b85442ad0b7aae91f66de261564965bc +size 909559448 diff --git a/L2L_starcoder_N3.1e08_D2.6e10_C4.8e19/model.safetensors b/L2L_starcoder_N3.1e08_D2.6e10_C4.8e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bf38fce59cfb7d2615a1974ff01300ea5dda097 --- /dev/null +++ b/L2L_starcoder_N3.1e08_D2.6e10_C4.8e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af49fb157153fcedf6d74e657a7a153bcac3dbce047c3b7333706a5b4f9767ec +size 1244778504 diff --git a/L2L_starcoder_N3.1e08_D5.4e09_C1.0e19/model.safetensors b/L2L_starcoder_N3.1e08_D5.4e09_C1.0e19/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16a2f1d3bb00cbe17d27947c30b3cc2f7ead5462 --- /dev/null +++ b/L2L_starcoder_N3.1e08_D5.4e09_C1.0e19/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df666f31bbdb2a999bd735ce1e69f5bf9da26b6dfee46c660b16817209eb6c01 +size 1244778504 diff --git a/L2L_starcoder_N3.5e07_D9.5e08_C2.0e17/model.safetensors b/L2L_starcoder_N3.5e07_D9.5e08_C2.0e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..089355291928b90dd60ad6d6ef4c9792e80ee998 --- /dev/null +++ b/L2L_starcoder_N3.5e07_D9.5e08_C2.0e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27f5050ad019a0d7a82e88e1eabe27b76f21fc2489c908ed413fa79752aaf88c +size 140815064 diff --git a/L2L_starcoder_N4.6e07_D7.7e09_C2.1e18/model.safetensors b/L2L_starcoder_N4.6e07_D7.7e09_C2.1e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..451b395007ba3b574108e7cf8d9d715af3f9a534 --- /dev/null +++ b/L2L_starcoder_N4.6e07_D7.7e09_C2.1e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a17d5fdd93366edcca87757c4bc0826f9e8d7145de73c02f87e4d9f3df4a744 +size 182182704 diff --git a/L2L_starcoder_N5.8e07_D2.8e09_C9.6e17/model.safetensors b/L2L_starcoder_N5.8e07_D2.8e09_C9.6e17/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..984f5d4f6ac201a4397530b574ca7fc7636ebad6 --- /dev/null +++ b/L2L_starcoder_N5.8e07_D2.8e09_C9.6e17/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1229e9782349056461dc38457c121cdf99fd44e8d968fb242e6a03a03cbea83f +size 231809928 diff --git a/L2L_starcoder_N9.0e07_D8.5e09_C4.6e18/model.safetensors b/L2L_starcoder_N9.0e07_D8.5e09_C4.6e18/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99b44c7b867c87dfc9e4801718a06a003034ccc8 --- /dev/null +++ b/L2L_starcoder_N9.0e07_D8.5e09_C4.6e18/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de71116ebf0213693e6b2d327ed0821a11eb3bc6f5224caad8534f73be049477 +size 360561776