Improve Transformers.js code snippet (#6)
Browse files- Improve Transformers.js code snippet (a6809227ef568c6f8a14a2226ba18e4fa42c776f)
- Upload README.md (0bab91d33ed6f45be1e858d2b0a15e50478cccd4)
Co-authored-by: Joshua <[email protected]>
README.md
CHANGED
@@ -5,6 +5,7 @@ tags:
|
|
5 |
- feature-extraction
|
6 |
- sentence-similarity
|
7 |
- mteb
|
|
|
8 |
model-index:
|
9 |
- name: binarize_False
|
10 |
results:
|
@@ -3083,38 +3084,38 @@ Note the small differences compared to the full 768-dimensional similarities.
|
|
3083 |
|
3084 |
### Transformers.js
|
3085 |
|
3086 |
-
|
3087 |
-
|
3088 |
-
|
3089 |
-
// Create a feature extraction pipeline
|
3090 |
-
const extractor = await pipeline('feature-extraction', 'nomic-ai/modernbert-embed-base', {
|
3091 |
-
quantized: false, // Comment out this line to use the quantized version
|
3092 |
-
});
|
3093 |
-
|
3094 |
-
// Compute sentence embeddings
|
3095 |
-
const texts = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?'];
|
3096 |
-
const embeddings = await extractor(texts, { pooling: 'mean', normalize: true });
|
3097 |
-
console.log(embeddings);
|
3098 |
```
|
3099 |
|
3100 |
-
|
3101 |
|
3102 |
```javascript
|
3103 |
-
import { pipeline } from '@
|
3104 |
|
3105 |
// Create a feature extraction pipeline
|
3106 |
-
const extractor = await pipeline(
|
3107 |
-
|
3108 |
-
|
3109 |
-
|
3110 |
-
|
3111 |
-
|
3112 |
-
|
3113 |
-
|
3114 |
-
|
3115 |
-
|
3116 |
-
|
|
|
|
|
|
|
|
|
|
|
3117 |
|
|
|
|
|
|
|
|
|
3118 |
|
3119 |
|
3120 |
## Training
|
@@ -3152,4 +3153,4 @@ If you find the model, dataset, or training code useful, please cite our work
|
|
3152 |
archivePrefix={arXiv},
|
3153 |
primaryClass={cs.CL}
|
3154 |
}
|
3155 |
-
```
|
|
|
5 |
- feature-extraction
|
6 |
- sentence-similarity
|
7 |
- mteb
|
8 |
+
- transformers.js
|
9 |
model-index:
|
10 |
- name: binarize_False
|
11 |
results:
|
|
|
3084 |
|
3085 |
### Transformers.js
|
3086 |
|
3087 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
|
3088 |
+
```bash
|
3089 |
+
npm i @huggingface/transformers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3090 |
```
|
3091 |
|
3092 |
+
Then, you can compute embeddings as follows:
|
3093 |
|
3094 |
```javascript
|
3095 |
+
import { pipeline, matmul } from '@huggingface/transformers';
|
3096 |
|
3097 |
// Create a feature extraction pipeline
|
3098 |
+
const extractor = await pipeline(
|
3099 |
+
"feature-extraction",
|
3100 |
+
"nomic-ai/modernbert-embed-base",
|
3101 |
+
{ dtype: "fp32" }, // Supported options: "fp32", "fp16", "q8", "q4", "q4f16"
|
3102 |
+
);
|
3103 |
+
|
3104 |
+
// Embed queries and documents
|
3105 |
+
const query_embeddings = await extractor([
|
3106 |
+
"search_query: What is TSNE?",
|
3107 |
+
"search_query: Who is Laurens van der Maaten?",
|
3108 |
+
], { pooling: "mean", normalize: true },
|
3109 |
+
);
|
3110 |
+
const doc_embeddings = await extractor([
|
3111 |
+
"search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten",
|
3112 |
+
], { pooling: "mean", normalize: true },
|
3113 |
+
);
|
3114 |
|
3115 |
+
// Compute similarity scores
|
3116 |
+
const similarities = await matmul(query_embeddings, doc_embeddings.transpose(1, 0));
|
3117 |
+
console.log(similarities.tolist()); // [[0.721383273601532], [0.3259955644607544]]
|
3118 |
+
```
|
3119 |
|
3120 |
|
3121 |
## Training
|
|
|
3153 |
archivePrefix={arXiv},
|
3154 |
primaryClass={cs.CL}
|
3155 |
}
|
3156 |
+
```
|