zpn Xenova HF staff commited on
Commit
bb0033c
1 Parent(s): 5960f15

Improve Transformers.js code snippet (#6)

Browse files

- Improve Transformers.js code snippet (a6809227ef568c6f8a14a2226ba18e4fa42c776f)
- Upload README.md (0bab91d33ed6f45be1e858d2b0a15e50478cccd4)


Co-authored-by: Joshua <[email protected]>

Files changed (1) hide show
  1. README.md +27 -26
README.md CHANGED
@@ -5,6 +5,7 @@ tags:
5
  - feature-extraction
6
  - sentence-similarity
7
  - mteb
 
8
  model-index:
9
  - name: binarize_False
10
  results:
@@ -3083,38 +3084,38 @@ Note the small differences compared to the full 768-dimensional similarities.
3083
 
3084
  ### Transformers.js
3085
 
3086
- ```javascript
3087
- import { pipeline } from '@xenova/transformers';
3088
-
3089
- // Create a feature extraction pipeline
3090
- const extractor = await pipeline('feature-extraction', 'nomic-ai/modernbert-embed-base', {
3091
- quantized: false, // Comment out this line to use the quantized version
3092
- });
3093
-
3094
- // Compute sentence embeddings
3095
- const texts = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?'];
3096
- const embeddings = await extractor(texts, { pooling: 'mean', normalize: true });
3097
- console.log(embeddings);
3098
  ```
3099
 
3100
- <details><summary>Click to see Transformers.js usage with different quantizations</summary>
3101
 
3102
  ```javascript
3103
- import { pipeline } from '@xenova/transformers';
3104
 
3105
  // Create a feature extraction pipeline
3106
- const extractor = await pipeline('feature-extraction', 'nomic-ai/modernbert-embed-base', {
3107
- dtype: 'q4f16',
3108
- });
3109
-
3110
- // Compute sentence embeddings
3111
- const texts = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?'];
3112
- const embeddings = await extractor(texts, { pooling: 'mean', normalize: true });
3113
- console.log(embeddings);
3114
- ```
3115
-
3116
- </details>
 
 
 
 
 
3117
 
 
 
 
 
3118
 
3119
 
3120
  ## Training
@@ -3152,4 +3153,4 @@ If you find the model, dataset, or training code useful, please cite our work
3152
  archivePrefix={arXiv},
3153
  primaryClass={cs.CL}
3154
  }
3155
- ```
 
5
  - feature-extraction
6
  - sentence-similarity
7
  - mteb
8
+ - transformers.js
9
  model-index:
10
  - name: binarize_False
11
  results:
 
3084
 
3085
  ### Transformers.js
3086
 
3087
+ If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
3088
+ ```bash
3089
+ npm i @huggingface/transformers
 
 
 
 
 
 
 
 
 
3090
  ```
3091
 
3092
+ Then, you can compute embeddings as follows:
3093
 
3094
  ```javascript
3095
+ import { pipeline, matmul } from '@huggingface/transformers';
3096
 
3097
  // Create a feature extraction pipeline
3098
+ const extractor = await pipeline(
3099
+ "feature-extraction",
3100
+ "nomic-ai/modernbert-embed-base",
3101
+ { dtype: "fp32" }, // Supported options: "fp32", "fp16", "q8", "q4", "q4f16"
3102
+ );
3103
+
3104
+ // Embed queries and documents
3105
+ const query_embeddings = await extractor([
3106
+ "search_query: What is TSNE?",
3107
+ "search_query: Who is Laurens van der Maaten?",
3108
+ ], { pooling: "mean", normalize: true },
3109
+ );
3110
+ const doc_embeddings = await extractor([
3111
+ "search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten",
3112
+ ], { pooling: "mean", normalize: true },
3113
+ );
3114
 
3115
+ // Compute similarity scores
3116
+ const similarities = await matmul(query_embeddings, doc_embeddings.transpose(1, 0));
3117
+ console.log(similarities.tolist()); // [[0.721383273601532], [0.3259955644607544]]
3118
+ ```
3119
 
3120
 
3121
  ## Training
 
3153
  archivePrefix={arXiv},
3154
  primaryClass={cs.CL}
3155
  }
3156
+ ```