smajumdar commited on
Commit
a540f7d
·
verified ·
1 Parent(s): f3ecefe

Push model using huggingface_hub.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. README.md +196 -0
  3. model_config.yaml +129 -0
  4. model_weights/common.pt +3 -0
  5. model_weights/metadata.json +1 -0
  6. model_weights/module.decoder.final_layernorm.bias/.zarray +14 -0
  7. model_weights/module.decoder.final_layernorm.bias/0 +0 -0
  8. model_weights/module.decoder.final_layernorm.weight/.zarray +14 -0
  9. model_weights/module.decoder.final_layernorm.weight/0 +1 -0
  10. model_weights/module.decoder.layers.mlp.linear_fc1._extra_state/shard_0_4.pt +3 -0
  11. model_weights/module.decoder.layers.mlp.linear_fc1._extra_state/shard_1_4.pt +3 -0
  12. model_weights/module.decoder.layers.mlp.linear_fc1._extra_state/shard_2_4.pt +3 -0
  13. model_weights/module.decoder.layers.mlp.linear_fc1._extra_state/shard_3_4.pt +3 -0
  14. model_weights/module.decoder.layers.mlp.linear_fc1.bias/.zarray +16 -0
  15. model_weights/module.decoder.layers.mlp.linear_fc1.bias/0.0 +0 -0
  16. model_weights/module.decoder.layers.mlp.linear_fc1.bias/1.0 +0 -0
  17. model_weights/module.decoder.layers.mlp.linear_fc1.bias/2.0 +0 -0
  18. model_weights/module.decoder.layers.mlp.linear_fc1.bias/3.0 +0 -0
  19. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/.zarray +16 -0
  20. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/0.0 +0 -0
  21. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/1.0 +0 -0
  22. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/2.0 +0 -0
  23. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/3.0 +0 -0
  24. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/.zarray +16 -0
  25. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/0.0 +1 -0
  26. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/1.0 +1 -0
  27. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/2.0 +1 -0
  28. model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/3.0 +1 -0
  29. model_weights/module.decoder.layers.mlp.linear_fc1.weight/.zarray +18 -0
  30. model_weights/module.decoder.layers.mlp.linear_fc1.weight/0.0.0 +0 -0
  31. model_weights/module.decoder.layers.mlp.linear_fc1.weight/1.0.0 +0 -0
  32. model_weights/module.decoder.layers.mlp.linear_fc1.weight/2.0.0 +0 -0
  33. model_weights/module.decoder.layers.mlp.linear_fc1.weight/3.0.0 +0 -0
  34. model_weights/module.decoder.layers.mlp.linear_fc2._extra_state/shard_0_4.pt +3 -0
  35. model_weights/module.decoder.layers.mlp.linear_fc2._extra_state/shard_1_4.pt +3 -0
  36. model_weights/module.decoder.layers.mlp.linear_fc2._extra_state/shard_2_4.pt +3 -0
  37. model_weights/module.decoder.layers.mlp.linear_fc2._extra_state/shard_3_4.pt +3 -0
  38. model_weights/module.decoder.layers.mlp.linear_fc2.bias/.zarray +16 -0
  39. model_weights/module.decoder.layers.mlp.linear_fc2.bias/0.0 +0 -0
  40. model_weights/module.decoder.layers.mlp.linear_fc2.bias/1.0 +0 -0
  41. model_weights/module.decoder.layers.mlp.linear_fc2.bias/2.0 +0 -0
  42. model_weights/module.decoder.layers.mlp.linear_fc2.bias/3.0 +0 -0
  43. model_weights/module.decoder.layers.mlp.linear_fc2.weight/.zarray +18 -0
  44. model_weights/module.decoder.layers.mlp.linear_fc2.weight/0.0.0 +0 -0
  45. model_weights/module.decoder.layers.mlp.linear_fc2.weight/1.0.0 +0 -0
  46. model_weights/module.decoder.layers.mlp.linear_fc2.weight/2.0.0 +0 -0
  47. model_weights/module.decoder.layers.mlp.linear_fc2.weight/3.0.0 +0 -0
  48. model_weights/module.decoder.layers.self_attention.linear_proj._extra_state/shard_0_4.pt +3 -0
  49. model_weights/module.decoder.layers.self_attention.linear_proj._extra_state/shard_1_4.pt +3 -0
  50. model_weights/module.decoder.layers.self_attention.linear_proj._extra_state/shard_2_4.pt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_weights/module.embedding.position_embeddings.weight/0.0 filter=lfs diff=lfs merge=lfs -text
37
+ model_weights/module.embedding.word_embeddings.weight/0.0 filter=lfs diff=lfs merge=lfs -text
38
+ model_weights/module.output_layer.weight/0.0 filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-4.0
3
+ library_name: nemo
4
+ tags:
5
+ - pytorch
6
+ - NeMo
7
+ ---
8
+
9
+ # Abc5
10
+
11
+ <style>
12
+ img {
13
+ display: inline;
14
+ }
15
+ </style>
16
+
17
+ [![Model architecture](https://img.shields.io/badge/Model_Arch-PUT-YOUR-ARCHITECTURE-HERE-lightgrey#model-badge)](#model-architecture)
18
+ | [![Model size](https://img.shields.io/badge/Params-PUT-YOUR-MODEL-SIZE-HERE-lightgrey#model-badge)](#model-architecture)
19
+ | [![Language](https://img.shields.io/badge/Language-PUT-YOUR-LANGUAGE-HERE-lightgrey#model-badge)](#datasets)
20
+
21
+ **Put a short model description here.**
22
+
23
+ See the [model architecture](#model-architecture) section and [NeMo documentation](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/index.html) for complete architecture details.
24
+
25
+
26
+ ## NVIDIA NeMo: Training
27
+
28
+ To train, fine-tune or play with the model you will need to install [NVIDIA NeMo](https://github.com/NVIDIA/NeMo). We recommend you install it after you've installed latest Pytorch version.
29
+ ```
30
+ pip install nemo_toolkit['all']
31
+ ```
32
+
33
+ ## How to Use this Model
34
+
35
+ The model is available for use in the NeMo toolkit [3], and can be used as a pre-trained checkpoint for inference or for fine-tuning on another dataset.
36
+
37
+ ### Automatically instantiate the model
38
+
39
+ **NOTE**: Please update the model class below to match the class of the model being uploaded.
40
+
41
+ ```python
42
+ import nemo.core import ModelPT
43
+ model = ModelPT.from_pretrained("smajumdar/abc5")
44
+ ```
45
+
46
+ ### NOTE
47
+
48
+ Add some information about how to use the model here. An example is provided for ASR inference below.
49
+
50
+ ### Transcribing using Python
51
+ First, let's get a sample
52
+ ```
53
+ wget https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav
54
+ ```
55
+ Then simply do:
56
+ ```
57
+ asr_model.transcribe(['2086-149220-0033.wav'])
58
+ ```
59
+
60
+ ### Transcribing many audio files
61
+
62
+ ```shell
63
+ python [NEMO_GIT_FOLDER]/examples/asr/transcribe_speech.py pretrained_name="smajumdar/abc5" audio_dir=""
64
+ ```
65
+
66
+ ### Input
67
+
68
+ **Add some information about what are the inputs to this model**
69
+
70
+ ### Output
71
+
72
+ **Add some information about what are the outputs of this model**
73
+
74
+ ## Model Architecture
75
+
76
+ **Add information here discussing architectural details of the model or any comments to users about the model.**
77
+
78
+ ## Training
79
+
80
+ **Add information here about how the model was trained. It should be as detailed as possible, potentially including the the link to the script used to train as well as the base config used to train the model. If extraneous scripts are used to prepare the components of the model, please include them here.**
81
+
82
+ ### NOTE
83
+
84
+ An example is provided below for ASR
85
+
86
+ The NeMo toolkit [3] was used for training the models for over several hundred epochs. These model are trained with this [example script](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/asr_transducer/speech_to_text_rnnt_bpe.py) and this [base config](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml).
87
+
88
+ The tokenizers for these models were built using the text transcripts of the train set with this [script](https://github.com/NVIDIA/NeMo/blob/main/scripts/tokenizers/process_asr_text_tokenizer.py).
89
+
90
+
91
+ ### Datasets
92
+
93
+ **Try to provide as detailed a list of datasets as possible. If possible, provide links to the datasets on HF by adding it to the manifest section at the top of the README (marked by ---).**
94
+
95
+ ### NOTE
96
+
97
+ An example for the manifest section is provided below for ASR datasets
98
+
99
+ datasets:
100
+ - librispeech_asr
101
+ - fisher_corpus
102
+ - Switchboard-1
103
+ - WSJ-0
104
+ - WSJ-1
105
+ - National-Singapore-Corpus-Part-1
106
+ - National-Singapore-Corpus-Part-6
107
+ - vctk
108
+ - voxpopuli
109
+ - europarl
110
+ - multilingual_librispeech
111
+ - mozilla-foundation/common_voice_8_0
112
+ - MLCommons/peoples_speech
113
+
114
+ The corresponding text in this section for those datasets is stated below -
115
+
116
+ The model was trained on 64K hours of English speech collected and prepared by NVIDIA NeMo and Suno teams.
117
+
118
+ The training dataset consists of private subset with 40K hours of English speech plus 24K hours from the following public datasets:
119
+
120
+ - Librispeech 960 hours of English speech
121
+ - Fisher Corpus
122
+ - Switchboard-1 Dataset
123
+ - WSJ-0 and WSJ-1
124
+ - National Speech Corpus (Part 1, Part 6)
125
+ - VCTK
126
+ - VoxPopuli (EN)
127
+ - Europarl-ASR (EN)
128
+ - Multilingual Librispeech (MLS EN) - 2,000 hour subset
129
+ - Mozilla Common Voice (v7.0)
130
+ - People's Speech - 12,000 hour subset
131
+
132
+
133
+ ## Performance
134
+
135
+ **Add information here about the performance of the model. Discuss what is the metric that is being used to evaluate the model and if there are external links explaning the custom metric, please link to it.
136
+
137
+ ### NOTE
138
+
139
+ An example is provided below for ASR metrics list that can be added to the top of the README
140
+
141
+ model-index:
142
+ - name: PUT_MODEL_NAME
143
+ results:
144
+ - task:
145
+ name: Automatic Speech Recognition
146
+ type: automatic-speech-recognition
147
+ dataset:
148
+ name: AMI (Meetings test)
149
+ type: edinburghcstr/ami
150
+ config: ihm
151
+ split: test
152
+ args:
153
+ language: en
154
+ metrics:
155
+ - name: Test WER
156
+ type: wer
157
+ value: 17.10
158
+ - task:
159
+ name: Automatic Speech Recognition
160
+ type: automatic-speech-recognition
161
+ dataset:
162
+ name: Earnings-22
163
+ type: revdotcom/earnings22
164
+ split: test
165
+ args:
166
+ language: en
167
+ metrics:
168
+ - name: Test WER
169
+ type: wer
170
+ value: 14.11
171
+
172
+ Provide any caveats about the results presented in the top of the discussion so that nuance is not lost.
173
+
174
+ It should ideally be in a tabular format (you can use the following website to make your tables in markdown format - https://www.tablesgenerator.com/markdown_tables)**
175
+
176
+ ## Limitations
177
+
178
+ **Discuss any practical limitations to the model when being used in real world cases. They can also be legal disclaimers, or discussion regarding the safety of the model (particularly in the case of LLMs).**
179
+
180
+
181
+ ### Note
182
+
183
+ An example is provided below
184
+
185
+ Since this model was trained on publicly available speech datasets, the performance of this model might degrade for speech which includes technical terms, or vernacular that the model has not been trained on. The model might also perform worse for accented speech.
186
+
187
+
188
+ ## License
189
+
190
+ License to use this model is covered by the [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/). By downloading the public and release version of the model, you accept the terms and conditions of the [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/) license.
191
+
192
+ ## References
193
+
194
+ **Provide appropriate references in the markdown link format below. Please order them numerically.**
195
+
196
+ [1] [NVIDIA NeMo Toolkit](https://github.com/NVIDIA/NeMo)
model_config.yaml ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tensor_model_parallel_size: 1
2
+ pipeline_model_parallel_size: 1
3
+ virtual_pipeline_model_parallel_size: null
4
+ sequence_parallel: false
5
+ context_parallel_size: 1
6
+ expert_model_parallel_size: 1
7
+ moe_extended_tp: false
8
+ perform_initialization: true
9
+ use_cpu_initialization: false
10
+ fp16: false
11
+ bf16: false
12
+ params_dtype: float32
13
+ timers: null
14
+ finalize_model_grads_func: null
15
+ grad_scale_func: null
16
+ no_sync_func: null
17
+ grad_sync_func: null
18
+ param_sync_func: null
19
+ deterministic_mode: false
20
+ enable_autocast: false
21
+ autocast_dtype: float32
22
+ num_microbatches_with_partial_activation_checkpoints: null
23
+ gradient_accumulation_fusion: false
24
+ async_tensor_model_parallel_allreduce: false
25
+ use_te_rng_tracker: false
26
+ tp_comm_overlap: false
27
+ tp_comm_bulk_wgrad: true
28
+ tp_comm_bulk_dgrad: true
29
+ tp_comm_overlap_ag: true
30
+ tp_comm_overlap_rs: true
31
+ tp_comm_overlap_rs_dgrad: false
32
+ tp_comm_split_ag: true
33
+ tp_comm_atomic_ag: false
34
+ tp_comm_split_rs: true
35
+ tp_comm_atomic_rs: false
36
+ pipeline_dtype: null
37
+ variable_seq_lengths: false
38
+ overlap_p2p_comm: false
39
+ batch_p2p_comm: true
40
+ batch_p2p_sync: true
41
+ use_ring_exchange_p2p: false
42
+ deallocate_pipeline_outputs: false
43
+ defer_embedding_wgrad_compute: false
44
+ pipeline_model_parallel_split_rank: null
45
+ cpu_offloading: false
46
+ cpu_offloading_num_layers: 0
47
+ _cpu_offloading_context: null
48
+ cpu_offloading_activations: true
49
+ cpu_offloading_weights: true
50
+ barrier_with_L1_time: true
51
+ fp16_lm_cross_entropy: false
52
+ parallel_output: true
53
+ share_embeddings_and_output_weights: false
54
+ make_vocab_size_divisible_by: 128
55
+ position_embedding_type: learned_absolute
56
+ rotary_base: 10000
57
+ rotary_percent: 1.0
58
+ seq_len_interpolation_factor: null
59
+ seq_length: 2048
60
+ optim:
61
+ name: fused_adam
62
+ sched: null
63
+ optimizer_fn: null
64
+ tokenizer_filepath: null
65
+ num_layers: 4
66
+ hidden_size: 256
67
+ num_attention_heads: 4
68
+ num_query_groups: 4
69
+ ffn_hidden_size: 256
70
+ kv_channels: 64
71
+ hidden_dropout: 0.1
72
+ attention_dropout: 0.1
73
+ fp32_residual_connection: false
74
+ apply_residual_connection_post_layernorm: false
75
+ layernorm_epsilon: 1.0e-05
76
+ layernorm_zero_centered_gamma: false
77
+ add_bias_linear: true
78
+ add_qkv_bias: false
79
+ gated_linear_unit: false
80
+ activation_func: gelu
81
+ activation_func_fp8_input_store: false
82
+ num_moe_experts: null
83
+ rotary_interleaved: false
84
+ window_size: null
85
+ normalization: LayerNorm
86
+ qk_layernorm: false
87
+ test_mode: false
88
+ calculate_per_token_loss: false
89
+ init_method: init_
90
+ output_layer_init_method: init_
91
+ init_method_std: 0.02
92
+ apply_query_key_layer_scaling: false
93
+ attention_softmax_in_fp32: true
94
+ bias_activation_fusion: false
95
+ masked_softmax_fusion: false
96
+ persist_layer_norm: false
97
+ memory_efficient_layer_norm: false
98
+ bias_dropout_fusion: false
99
+ apply_rope_fusion: false
100
+ recompute_granularity: null
101
+ recompute_method: null
102
+ recompute_num_layers: null
103
+ distribute_saved_activations: null
104
+ fp8: null
105
+ fp8_margin: 0
106
+ fp8_interval: 1
107
+ fp8_amax_history_len: 1
108
+ fp8_amax_compute_algo: most_recent
109
+ fp8_wgrad: true
110
+ fp8_dot_product_attention: false
111
+ fp8_multi_head_attention: false
112
+ moe_router_load_balancing_type: aux_loss
113
+ moe_router_topk: 2
114
+ moe_grouped_gemm: false
115
+ moe_aux_loss_coeff: 0.0
116
+ moe_z_loss_coeff: null
117
+ moe_input_jitter_eps: null
118
+ moe_token_dropping: false
119
+ moe_token_dispatcher_type: allgather
120
+ moe_per_layer_logging: false
121
+ moe_expert_capacity_factor: null
122
+ moe_pad_expert_input_to_capacity: false
123
+ moe_token_drop_policy: probs
124
+ moe_layer_recompute: false
125
+ clone_scatter_output_in_embedding: true
126
+ disable_parameter_transpose_cache: false
127
+ enable_cuda_graph: false
128
+ target: nemo.collections.llm.gpt.model.base_v2.GPTModelV2
129
+ nemo_version: 2.0.0rc1
model_weights/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e4090fa34d96307127606cccef3ae99aedae58279e8bdf1746d44d3bf7aa47
3
+ size 860
model_weights/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "zarr", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model_weights/module.decoder.final_layernorm.bias/.zarray ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 256
4
+ ],
5
+ "compressor": null,
6
+ "dtype": "bfloat16",
7
+ "fill_value": null,
8
+ "filters": null,
9
+ "order": "C",
10
+ "shape": [
11
+ 256
12
+ ],
13
+ "zarr_format": 2
14
+ }
model_weights/module.decoder.final_layernorm.bias/0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.final_layernorm.weight/.zarray ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 256
4
+ ],
5
+ "compressor": null,
6
+ "dtype": "bfloat16",
7
+ "fill_value": null,
8
+ "filters": null,
9
+ "order": "C",
10
+ "shape": [
11
+ 256
12
+ ],
13
+ "zarr_format": 2
14
+ }
model_weights/module.decoder.final_layernorm.weight/0 ADDED
@@ -0,0 +1 @@
 
 
1
+ �?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?
model_weights/module.decoder.layers.mlp.linear_fc1._extra_state/shard_0_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe8cf89ac8228df7c20a5fbac2a50c841310072585016f109c1955934c30a0f
3
+ size 1832
model_weights/module.decoder.layers.mlp.linear_fc1._extra_state/shard_1_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0399cdab9deadf27e09a92b0fffb8a3c3ba32d2d100bbeaf41c8056257c338
3
+ size 1832
model_weights/module.decoder.layers.mlp.linear_fc1._extra_state/shard_2_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4195c2ae65e03ab3843c91ed7bca9cd02ca971de54785f765564867b3ba53e07
3
+ size 1832
model_weights/module.decoder.layers.mlp.linear_fc1._extra_state/shard_3_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc0f2a0c549c38845a9e549180085c1e05916eb0fc2eef084e3411c67b1379b
3
+ size 1832
model_weights/module.decoder.layers.mlp.linear_fc1.bias/.zarray ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 1,
4
+ 256
5
+ ],
6
+ "compressor": null,
7
+ "dtype": "bfloat16",
8
+ "fill_value": null,
9
+ "filters": null,
10
+ "order": "C",
11
+ "shape": [
12
+ 4,
13
+ 256
14
+ ],
15
+ "zarr_format": 2
16
+ }
model_weights/module.decoder.layers.mlp.linear_fc1.bias/0.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.bias/1.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.bias/2.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.bias/3.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/.zarray ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 1,
4
+ 256
5
+ ],
6
+ "compressor": null,
7
+ "dtype": "bfloat16",
8
+ "fill_value": null,
9
+ "filters": null,
10
+ "order": "C",
11
+ "shape": [
12
+ 4,
13
+ 256
14
+ ],
15
+ "zarr_format": 2
16
+ }
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/0.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/1.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/2.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_bias/3.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/.zarray ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 1,
4
+ 256
5
+ ],
6
+ "compressor": null,
7
+ "dtype": "bfloat16",
8
+ "fill_value": null,
9
+ "filters": null,
10
+ "order": "C",
11
+ "shape": [
12
+ 4,
13
+ 256
14
+ ],
15
+ "zarr_format": 2
16
+ }
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/0.0 ADDED
@@ -0,0 +1 @@
 
 
1
+ �?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/1.0 ADDED
@@ -0,0 +1 @@
 
 
1
+ �?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/2.0 ADDED
@@ -0,0 +1 @@
 
 
1
+ �?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?
model_weights/module.decoder.layers.mlp.linear_fc1.layer_norm_weight/3.0 ADDED
@@ -0,0 +1 @@
 
 
1
+ �?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?�?
model_weights/module.decoder.layers.mlp.linear_fc1.weight/.zarray ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 1,
4
+ 256,
5
+ 256
6
+ ],
7
+ "compressor": null,
8
+ "dtype": "bfloat16",
9
+ "fill_value": null,
10
+ "filters": null,
11
+ "order": "C",
12
+ "shape": [
13
+ 4,
14
+ 256,
15
+ 256
16
+ ],
17
+ "zarr_format": 2
18
+ }
model_weights/module.decoder.layers.mlp.linear_fc1.weight/0.0.0 ADDED
Binary file (131 kB). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.weight/1.0.0 ADDED
Binary file (131 kB). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.weight/2.0.0 ADDED
Binary file (131 kB). View file
 
model_weights/module.decoder.layers.mlp.linear_fc1.weight/3.0.0 ADDED
Binary file (131 kB). View file
 
model_weights/module.decoder.layers.mlp.linear_fc2._extra_state/shard_0_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe8cf89ac8228df7c20a5fbac2a50c841310072585016f109c1955934c30a0f
3
+ size 1832
model_weights/module.decoder.layers.mlp.linear_fc2._extra_state/shard_1_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0399cdab9deadf27e09a92b0fffb8a3c3ba32d2d100bbeaf41c8056257c338
3
+ size 1832
model_weights/module.decoder.layers.mlp.linear_fc2._extra_state/shard_2_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4195c2ae65e03ab3843c91ed7bca9cd02ca971de54785f765564867b3ba53e07
3
+ size 1832
model_weights/module.decoder.layers.mlp.linear_fc2._extra_state/shard_3_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc0f2a0c549c38845a9e549180085c1e05916eb0fc2eef084e3411c67b1379b
3
+ size 1832
model_weights/module.decoder.layers.mlp.linear_fc2.bias/.zarray ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 1,
4
+ 256
5
+ ],
6
+ "compressor": null,
7
+ "dtype": "bfloat16",
8
+ "fill_value": null,
9
+ "filters": null,
10
+ "order": "C",
11
+ "shape": [
12
+ 4,
13
+ 256
14
+ ],
15
+ "zarr_format": 2
16
+ }
model_weights/module.decoder.layers.mlp.linear_fc2.bias/0.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc2.bias/1.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc2.bias/2.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc2.bias/3.0 ADDED
Binary file (512 Bytes). View file
 
model_weights/module.decoder.layers.mlp.linear_fc2.weight/.zarray ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 1,
4
+ 256,
5
+ 256
6
+ ],
7
+ "compressor": null,
8
+ "dtype": "bfloat16",
9
+ "fill_value": null,
10
+ "filters": null,
11
+ "order": "C",
12
+ "shape": [
13
+ 4,
14
+ 256,
15
+ 256
16
+ ],
17
+ "zarr_format": 2
18
+ }
model_weights/module.decoder.layers.mlp.linear_fc2.weight/0.0.0 ADDED
Binary file (131 kB). View file
 
model_weights/module.decoder.layers.mlp.linear_fc2.weight/1.0.0 ADDED
Binary file (131 kB). View file
 
model_weights/module.decoder.layers.mlp.linear_fc2.weight/2.0.0 ADDED
Binary file (131 kB). View file
 
model_weights/module.decoder.layers.mlp.linear_fc2.weight/3.0.0 ADDED
Binary file (131 kB). View file
 
model_weights/module.decoder.layers.self_attention.linear_proj._extra_state/shard_0_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe8cf89ac8228df7c20a5fbac2a50c841310072585016f109c1955934c30a0f
3
+ size 1832
model_weights/module.decoder.layers.self_attention.linear_proj._extra_state/shard_1_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0399cdab9deadf27e09a92b0fffb8a3c3ba32d2d100bbeaf41c8056257c338
3
+ size 1832
model_weights/module.decoder.layers.self_attention.linear_proj._extra_state/shard_2_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4195c2ae65e03ab3843c91ed7bca9cd02ca971de54785f765564867b3ba53e07
3
+ size 1832