aapot commited on
Commit
54f351c
·
1 Parent(s): 53dbb7e

Add onnx model

Browse files
.gitattributes CHANGED
@@ -14,6 +14,7 @@
14
  *.npy filter=lfs diff=lfs merge=lfs -text
15
  *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
14
  *.npy filter=lfs diff=lfs merge=lfs -text
15
  *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx.data filter=lfs diff=lfs merge=lfs -text
18
  *.ot filter=lfs diff=lfs merge=lfs -text
19
  *.parquet filter=lfs diff=lfs merge=lfs -text
20
  *.pb filter=lfs diff=lfs merge=lfs -text
 
34
  *.zip filter=lfs diff=lfs merge=lfs -text
35
  *.zst filter=lfs diff=lfs merge=lfs -text
36
  *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
Constant_685_attr__value ADDED
Binary file (65.6 kB). View file
 
bgem3_model.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modified from https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/BGE_M3/modeling.py
2
+
3
+ import os
4
+ import torch
5
+ from torch import nn, Tensor
6
+ from transformers import AutoModel, AutoConfig
7
+ from huggingface_hub import snapshot_download
8
+ from typing import Dict
9
+
10
+
11
+ class BGEM3InferenceModel(nn.Module):
12
+ def __init__(
13
+ self,
14
+ model_name: str = "BAAI/bge-m3",
15
+ colbert_dim: int = -1,
16
+ ) -> None:
17
+ super().__init__()
18
+
19
+ model_name = snapshot_download(
20
+ repo_id=model_name,
21
+ allow_patterns=[
22
+ "model.safetensors",
23
+ "colbert_linear.pt",
24
+ "sparse_linear.pt",
25
+ "config.json",
26
+ ],
27
+ )
28
+
29
+ self.config = AutoConfig.from_pretrained(model_name)
30
+ self.model = AutoModel.from_pretrained(model_name)
31
+ self.colbert_linear = torch.nn.Linear(
32
+ in_features=self.model.config.hidden_size,
33
+ out_features=(
34
+ self.model.config.hidden_size if colbert_dim == -1 else colbert_dim
35
+ ),
36
+ )
37
+ self.sparse_linear = torch.nn.Linear(
38
+ in_features=self.model.config.hidden_size, out_features=1
39
+ )
40
+ colbert_state_dict = torch.load(
41
+ os.path.join(model_name, "colbert_linear.pt"), map_location="cpu"
42
+ )
43
+ sparse_state_dict = torch.load(
44
+ os.path.join(model_name, "sparse_linear.pt"), map_location="cpu"
45
+ )
46
+ self.colbert_linear.load_state_dict(colbert_state_dict)
47
+ self.sparse_linear.load_state_dict(sparse_state_dict)
48
+
49
+ def dense_embedding(self, last_hidden_state: Tensor) -> Tensor:
50
+ return last_hidden_state[:, 0]
51
+
52
+ def sparse_embedding(self, last_hidden_state: Tensor) -> Tensor:
53
+ with torch.no_grad():
54
+ return torch.relu(self.sparse_linear(last_hidden_state))
55
+
56
+ def colbert_embedding(
57
+ self, last_hidden_state: Tensor, attention_mask: Tensor
58
+ ) -> Tensor:
59
+ with torch.no_grad():
60
+ colbert_vecs = self.colbert_linear(last_hidden_state[:, 1:])
61
+ colbert_vecs = colbert_vecs * attention_mask[:, 1:][:, :, None].float()
62
+ return colbert_vecs
63
+
64
+ def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Dict[str, Tensor]:
65
+ with torch.no_grad():
66
+ last_hidden_state = self.model(
67
+ input_ids=input_ids, attention_mask=attention_mask, return_dict=True
68
+ ).last_hidden_state
69
+
70
+ output = {}
71
+ dense_vecs = self.dense_embedding(last_hidden_state)
72
+ output["dense_vecs"] = torch.nn.functional.normalize(dense_vecs, dim=-1)
73
+
74
+ sparse_vecs = self.sparse_embedding(last_hidden_state)
75
+ output["sparse_vecs"] = sparse_vecs
76
+
77
+ colbert_vecs = self.colbert_embedding(last_hidden_state, attention_mask)
78
+ output["colbert_vecs"] = torch.nn.functional.normalize(colbert_vecs, dim=-1)
79
+
80
+ return output
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": ".",
3
+ "architectures": [
4
+ "XLMRobertaModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 8194,
17
+ "model_type": "xlm-roberta",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 24,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.37.2",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 250002
28
+ }
export_onnx.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import argparse
3
+ from optimum.exporters.onnx import onnx_export_from_model
4
+ from collections import OrderedDict
5
+ from typing import Dict
6
+ from optimum.exporters.onnx.model_configs import XLMRobertaOnnxConfig
7
+ from bgem3_model import BGEM3InferenceModel
8
+
9
+
10
+ class BGEM3OnnxConfig(XLMRobertaOnnxConfig):
11
+ @property
12
+ def outputs(self) -> Dict[str, Dict[int, str]]:
13
+ """
14
+ Dict containing the axis definition of the output tensors to provide to the model.
15
+
16
+ Returns:
17
+ `Dict[str, Dict[int, str]]`: A mapping of each output name to a mapping of axis position to the axes symbolic name.
18
+ """
19
+ return copy.deepcopy(
20
+ OrderedDict(
21
+ {
22
+ "dense_vecs": {0: "batch_size", 1: "embedding"},
23
+ "sparse_vecs": {0: "batch_size", 1: "token", 2: "weight"},
24
+ "colbert_vecs": {0: "batch_size", 1: "token", 2: "embedding"},
25
+ }
26
+ )
27
+ )
28
+
29
+
30
+ def main(output: str, opset: int, device: str, optimize: str, atol: str):
31
+ model = BGEM3InferenceModel()
32
+ bgem3_onnx_config = BGEM3OnnxConfig(model.config)
33
+ onnx_export_from_model(
34
+ model,
35
+ output=output,
36
+ task="feature-extraction",
37
+ custom_onnx_configs={"model": bgem3_onnx_config},
38
+ opset=opset,
39
+ optimize=optimize,
40
+ atol=atol,
41
+ device=device,
42
+ )
43
+
44
+
45
+ if __name__ == "__main__":
46
+ parser = argparse.ArgumentParser()
47
+ parser.add_argument(
48
+ "--output",
49
+ type=str,
50
+ help="Path indicating the directory where to store the generated ONNX model.",
51
+ )
52
+ parser.add_argument(
53
+ "--opset",
54
+ type=int,
55
+ default=None,
56
+ help="If specified, ONNX opset version to export the model with. Otherwise, the default opset for the given model architecture will be used.",
57
+ )
58
+ parser.add_argument(
59
+ "--device",
60
+ type=str,
61
+ default="cpu",
62
+ help='The device to use to do the export. Defaults to "cpu".',
63
+ )
64
+ parser.add_argument(
65
+ "--optimize",
66
+ type=str,
67
+ default=None,
68
+ choices=["O1", "O2", "O3", "O4"],
69
+ help=(
70
+ "Allows to run ONNX Runtime optimizations directly during the export. Some of these optimizations are specific to ONNX Runtime, and the resulting ONNX will not be usable with other runtime as OpenVINO or TensorRT. Possible options:\n"
71
+ " - O1: Basic general optimizations\n"
72
+ " - O2: Basic and extended general optimizations, transformers-specific fusions\n"
73
+ " - O3: Same as O2 with GELU approximation\n"
74
+ " - O4: Same as O3 with mixed precision (fp16, GPU-only, requires `--device cuda`)"
75
+ ),
76
+ )
77
+ parser.add_argument(
78
+ "--atol",
79
+ type=float,
80
+ default=None,
81
+ help="If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used.",
82
+ )
83
+ args = parser.parse_args()
84
+
85
+ main(args.output, args.opset, args.device, args.optimize, args.atol)
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:138d09ae2920b7e8731f01cba6b5ad996fd64bdfe34971e2d22ecbcf322e25b1
3
+ size 108605
model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa52ffdb65db76612d6c3ad92130221822f613004113e8c0af18c5eab81a81d
3
+ size 2271088656
ort_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "one_external_file": true,
3
+ "opset": null,
4
+ "optimization": {
5
+ "disable_attention": null,
6
+ "disable_attention_fusion": false,
7
+ "disable_bias_gelu": null,
8
+ "disable_bias_gelu_fusion": false,
9
+ "disable_bias_skip_layer_norm": null,
10
+ "disable_bias_skip_layer_norm_fusion": false,
11
+ "disable_embed_layer_norm": true,
12
+ "disable_embed_layer_norm_fusion": true,
13
+ "disable_gelu": null,
14
+ "disable_gelu_fusion": false,
15
+ "disable_group_norm_fusion": true,
16
+ "disable_layer_norm": null,
17
+ "disable_layer_norm_fusion": false,
18
+ "disable_packed_kv": true,
19
+ "disable_rotary_embeddings": false,
20
+ "disable_shape_inference": true,
21
+ "disable_skip_layer_norm": null,
22
+ "disable_skip_layer_norm_fusion": false,
23
+ "enable_gelu_approximation": false,
24
+ "enable_gemm_fast_gelu_fusion": false,
25
+ "enable_transformers_specific_optimizations": true,
26
+ "fp16": false,
27
+ "no_attention_mask": false,
28
+ "optimization_level": 2,
29
+ "optimize_for_gpu": false,
30
+ "optimize_with_onnxruntime_only": null,
31
+ "use_mask_index": false,
32
+ "use_multi_head_attention": false,
33
+ "use_raw_attention_mask": false
34
+ },
35
+ "optimum_version": "1.17.0",
36
+ "quantization": {},
37
+ "transformers_version": "4.37.2",
38
+ "use_external_data_format": true
39
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ accelerate==0.27.2
2
+ huggingface-hub==0.20.3
3
+ onnx==1.15.0
4
+ onnxruntime==1.17.0
5
+ optimum==1.17.0
6
+ torch==2.2.0
7
+ transformers==4.37.2
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6710678b12670bc442b99edc952c4d996ae309a7020c1fa0096dd245c2faf790
3
+ size 17082821
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "model_max_length": 8192,
50
+ "pad_token": "<pad>",
51
+ "sep_token": "</s>",
52
+ "sp_model_kwargs": {},
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }