ibivibiv commited on
Commit
1f2fcfa
·
verified ·
1 Parent(s): 2c9b7ef

Upload MixtralForCausalLM

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +34 -0
  2. generation_config.json +7 -0
  3. model-00001-of-00108.safetensors +3 -0
  4. model-00002-of-00108.safetensors +3 -0
  5. model-00003-of-00108.safetensors +3 -0
  6. model-00004-of-00108.safetensors +3 -0
  7. model-00005-of-00108.safetensors +3 -0
  8. model-00006-of-00108.safetensors +3 -0
  9. model-00007-of-00108.safetensors +3 -0
  10. model-00008-of-00108.safetensors +3 -0
  11. model-00009-of-00108.safetensors +3 -0
  12. model-00010-of-00108.safetensors +3 -0
  13. model-00011-of-00108.safetensors +3 -0
  14. model-00012-of-00108.safetensors +3 -0
  15. model-00013-of-00108.safetensors +3 -0
  16. model-00014-of-00108.safetensors +3 -0
  17. model-00015-of-00108.safetensors +3 -0
  18. model-00016-of-00108.safetensors +3 -0
  19. model-00017-of-00108.safetensors +3 -0
  20. model-00018-of-00108.safetensors +3 -0
  21. model-00019-of-00108.safetensors +3 -0
  22. model-00020-of-00108.safetensors +3 -0
  23. model-00021-of-00108.safetensors +3 -0
  24. model-00022-of-00108.safetensors +3 -0
  25. model-00023-of-00108.safetensors +3 -0
  26. model-00024-of-00108.safetensors +3 -0
  27. model-00025-of-00108.safetensors +3 -0
  28. model-00026-of-00108.safetensors +3 -0
  29. model-00027-of-00108.safetensors +3 -0
  30. model-00028-of-00108.safetensors +3 -0
  31. model-00029-of-00108.safetensors +3 -0
  32. model-00030-of-00108.safetensors +3 -0
  33. model-00031-of-00108.safetensors +3 -0
  34. model-00032-of-00108.safetensors +3 -0
  35. model-00033-of-00108.safetensors +3 -0
  36. model-00034-of-00108.safetensors +3 -0
  37. model-00035-of-00108.safetensors +3 -0
  38. model-00036-of-00108.safetensors +3 -0
  39. model-00037-of-00108.safetensors +3 -0
  40. model-00038-of-00108.safetensors +3 -0
  41. model-00039-of-00108.safetensors +3 -0
  42. model-00040-of-00108.safetensors +3 -0
  43. model-00041-of-00108.safetensors +3 -0
  44. model-00042-of-00108.safetensors +3 -0
  45. model-00043-of-00108.safetensors +3 -0
  46. model-00044-of-00108.safetensors +3 -0
  47. model-00045-of-00108.safetensors +3 -0
  48. model-00046-of-00108.safetensors +3 -0
  49. model-00047-of-00108.safetensors +3 -0
  50. model-00048-of-00108.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./orthorusv2",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 8192,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 28672,
14
+ "max_position_embeddings": 32764,
15
+ "model_type": "mixtral",
16
+ "num_attention_heads": 64,
17
+ "num_experts_per_tok": 2,
18
+ "num_hidden_layers": 80,
19
+ "num_key_value_heads": 8,
20
+ "num_local_experts": 2,
21
+ "output_router_logits": false,
22
+ "pad_token_id": 0,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 1000000,
27
+ "router_aux_loss_coef": 0.001,
28
+ "sliding_window": null,
29
+ "tie_word_embeddings": false,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.38.1",
32
+ "use_cache": true,
33
+ "vocab_size": 32000
34
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.1"
7
+ }
model-00001-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0100d803090eacdb640862d28d7f80f53be5bdad23718d3996e6bd8a6f68e60f
3
+ size 4471194712
model-00002-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ac09687c714e365c66360c3c575a7a9e941eae9c1f047c71981641c95c1162
3
+ size 4362208592
model-00003-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e50c71ee2d285c609d5800019ef978ee5f802ae7ba691d7a14806a01fa856202
3
+ size 4999677048
model-00004-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afae17bf1c46a49244ae33b00d86037b96056fc452f64ee0a4e81d9003b4e6c0
3
+ size 4999676936
model-00005-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af6374254953d6b1da52c05c9cc183476c939fbdaa652663f96bcc79fd1471b1
3
+ size 4362208592
model-00006-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b25176cb8a40197c8846d7365bf0f37e18020c832979e0ca2e456ff1f3d05d1
3
+ size 4362208592
model-00007-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f826881bc47abe7b932339f362554c25ab654c24ec483fc1aa2a43d2c2b4f4
3
+ size 4999677048
model-00008-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b0ab46028ed9559cd496d8aec79a8592013b911514492e967269db06e9c958c
3
+ size 4999676936
model-00009-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba5cd25e75c005af6350b38de8f3bc4188557ba0ac6e4f1cb3b181803a8201a3
3
+ size 4362208592
model-00010-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82d2f45faa250ab4802b35ce48f75bb104c5afb4a05004b5d17987a054062cac
3
+ size 4362208592
model-00011-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579e40c0a856f4c7e644dc08f5c65d963e8ea3c9d7da2ef8b42c2b8c50155a3b
3
+ size 4999677048
model-00012-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf97d2d5a9b3e3e5212060ea9fd5366b04a698307c6e145b642fbddeea8a0843
3
+ size 4999676936
model-00013-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e42bfbd81ebbe42e2f2752a25d5f7ac0e2c3e99a424680f06cd9e09291d63c
3
+ size 4362208592
model-00014-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e31cb66328e7acd8f2749b9b7f7889bba1efcc7bc7550a22eaa9ec5fb8b6ab72
3
+ size 4362208600
model-00015-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3827aab56aa9729b81397dc01fbecfbfe275699992811c0c6b95ad03f85845f8
3
+ size 4999677056
model-00016-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ae83afa1fb13cbee2dfc53080949f6c1e64bc9c19e69e1c7c548c2fecb7aa9
3
+ size 4999676944
model-00017-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90d3985211596b6a15d970e2b064b0b31abdf0034645a7ee506a15f70a9748c
3
+ size 4362208600
model-00018-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da447010f8d3cae8ce921f43775348c34b7564f1559115baadca48b4c3d28e32
3
+ size 4362208608
model-00019-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0bda91960c7592831251c3aff81a8762fa8716e660b92d71ea2f5fe9a7da02e
3
+ size 4999677056
model-00020-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe0f7d83b17c96db82d52d1ad1b505c054dc2c09f502e945331f23419cc2d2f
3
+ size 4999676944
model-00021-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6651e72371f30581871098594b99df21a679a9f755b35223f45d6cf038d1052a
3
+ size 4362208600
model-00022-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a218e627d7d83b08dd4ab05d8f79a8a46874d29af461680a895c566c45dd992f
3
+ size 4362208608
model-00023-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a928504045c222fad9b29c7c6da6b19a0b20b2b78600c79410cb83dae0fe56
3
+ size 4999677056
model-00024-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15257aeaa4e150000f9e9ea50d6e46f12fab8331f21f70c9d10db132d29d698f
3
+ size 4999676944
model-00025-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d45ced27708908994007e04d63f605fcf2f3682d48bb81851b70d750006d98b
3
+ size 4362208600
model-00026-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0dd1664e051f88f3f1243bd0a595de2bab7734b4fde3caaa7bcf03bf6873629
3
+ size 4362208608
model-00027-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:744b7719cb9bf7105d300aef697c7835a821f0f6eec1c77a4fd8035f9ee4eb61
3
+ size 4999677056
model-00028-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b19067a2f371696eae1cff0ab8e0f7411b62bcbbdd24ec418fe6e3828957c78
3
+ size 4999676944
model-00029-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541c26680f7bf40ea734446dd843dd19fc7e3bc38951c472afa2305bbcfd52c2
3
+ size 4362208600
model-00030-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b6785d6f19fed608b3ba4d510e04b262e8ecda3fc3bc02bc4ce66abcb7b3a0
3
+ size 4362208608
model-00031-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e74a2efac4261f9aaa98d13c3db97ceff1eb4883fe6ebf88b3f6b1c56cf49c8a
3
+ size 4999677056
model-00032-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53d2563eeb791effbf6f5f8db7f3c61e69f1bf22d7b89ca6343fefb1c665eddb
3
+ size 4999676944
model-00033-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85fe3e8417e52c4ee99b81bae702e2bd299c676f0a1f08d9d612b2d7db4a9f25
3
+ size 4362208600
model-00034-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9caafdeeced4c833e43f431cc73eea4afb3061ad1038ca9420f63fded5adea3f
3
+ size 4362208608
model-00035-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574717b3285d494b7162e4e7a3699596400ce9da7fc3367c3bcf3fd33941f70b
3
+ size 4999677056
model-00036-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bcfa538eeca53c436169ae62eb95b91496bf5fdf555b8ce98207948e1d27fac
3
+ size 4999676944
model-00037-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5638962f56c858239b2eaae849a8f01d672e31a570ee2ed403a5f1859ff0ba1a
3
+ size 4362208600
model-00038-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a0463de19c9870da62a79fbc6135e2acd1f2b675174ce458fc7d274e763f5f
3
+ size 4362208608
model-00039-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de571a4ef8a661cf940d5ccb61811e1e650774ca82956dcd237289e112c3b12
3
+ size 4999677056
model-00040-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d9fbf61bd3424f2e93eed8ed0c532756ab5047174b461f8f3970ce97638acd1
3
+ size 4999676944
model-00041-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:767b47a279526c319795b844d9554c28c0eb8b89910d18fdcef3acd3bbc36cf5
3
+ size 4362208600
model-00042-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851508347bc8753b25b62f93bd4c2285e5c4f45236daef2dd5c03d5771266988
3
+ size 4362208608
model-00043-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:951c171d17bc372e9faf1761e4c890323c820485f1cbd357865449b635d37491
3
+ size 4999677056
model-00044-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4011ed88e29b4299585071908e668c2da49c5bdb15f7d656120e098015b99948
3
+ size 4999676944
model-00045-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f28143f0fda45a8c46c322cb74c01b71cb7aaf2117e997838aa305f35b0bb8e
3
+ size 4362208600
model-00046-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8530aeb0538e37627e777b7286cdbf0fe41288d9b306185dda154c4956fe2ca1
3
+ size 4362208608
model-00047-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bceba555e472762a150edc32f0bdaac5568e079fe2d664ac1b66d43cc15dcb0e
3
+ size 4999677056
model-00048-of-00108.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be66ef8ff2be7955828d74eab73b707ebc75d4f79d52393348f4eb5be4267c0
3
+ size 4999676944