michaelfeil commited on
Commit
766b1f3
·
1 Parent(s): a67419b

Upload intfloat/e5-large ctranslate fp16 weights

Browse files
Files changed (4) hide show
  1. README.md +8 -11
  2. model.bin +2 -2
  3. modules.json +20 -0
  4. sentence_bert_config.json +4 -0
README.md CHANGED
@@ -2608,20 +2608,17 @@ Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on
2608
 
2609
  quantized version of [intfloat/e5-large](https://huggingface.co/intfloat/e5-large)
2610
  ```bash
2611
- pip install hf-hub-ctranslate2>=2.10.0 ctranslate2>=3.16.0
2612
  ```
2613
 
2614
  ```python
2615
  # from transformers import AutoTokenizer
2616
  model_name = "michaelfeil/ct2fast-e5-large"
2617
 
2618
- from hf_hub_ctranslate2 import EncoderCT2fromHfHub
2619
- model = EncoderCT2fromHfHub(
2620
- # load in int8 on CUDA
2621
- model_name_or_path=model_name,
2622
- device="cuda",
2623
- compute_type="float16",
2624
- # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
2625
  )
2626
  embeddings = model.encode(
2627
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
@@ -2635,13 +2632,13 @@ scores = (embeddings @ embeddings.T) * 100
2635
  ```
2636
 
2637
  Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
2638
- and [hf-hub-ctranslate2>=2.10.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
2639
  - `compute_type=int8_float16` for `device="cuda"`
2640
  - `compute_type=int8` for `device="cpu"`
2641
 
2642
- Converted on 2023-06-16 using
2643
  ```
2644
- ct2-transformers-converter --model intfloat/e5-large --output_dir ~/tmp-ct2fast-e5-large --force --copy_files tokenizer.json README.md tokenizer_config.json vocab.txt special_tokens_map.json .gitattributes --quantization float16 --trust_remote_code
2645
  ```
2646
 
2647
  # Licence and other remarks:
 
2608
 
2609
  quantized version of [intfloat/e5-large](https://huggingface.co/intfloat/e5-large)
2610
  ```bash
2611
+ pip install hf-hub-ctranslate2>=3.0.0 ctranslate2>=3.16.0
2612
  ```
2613
 
2614
  ```python
2615
  # from transformers import AutoTokenizer
2616
  model_name = "michaelfeil/ct2fast-e5-large"
2617
 
2618
+ from hf_hub_ctranslate2 import CT2SentenceTransformer
2619
+ model = CT2SentenceTransformer(
2620
+ model_name, compute_type="int8_float16", device="cuda",
2621
+ repo_contains_ct2=True
 
 
 
2622
  )
2623
  embeddings = model.encode(
2624
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
 
2632
  ```
2633
 
2634
  Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
2635
+ and [hf-hub-ctranslate2>=3.0.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
2636
  - `compute_type=int8_float16` for `device="cuda"`
2637
  - `compute_type=int8` for `device="cpu"`
2638
 
2639
+ Converted on 2023-06-18 using
2640
  ```
2641
+ ct2-transformers-converter --model intfloat/e5-large --output_dir ~/tmp-ct2fast-e5-large --force --copy_files tokenizer.json sentence_bert_config.json README.md modules.json special_tokens_map.json vocab.txt tokenizer_config.json .gitattributes --trust_remote_code
2642
  ```
2643
 
2644
  # Licence and other remarks:
model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86051c2d239ce41cb82c1ab7d6100db24aabcd903fa98b86415d5181c1d4a679
3
- size 670300108
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6efb605f25ccb63b543ebf025cf1db6f66fcd0dbdd665fcfe478b7baae85b321
3
+ size 1340583884
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }