Add support for transformers.js (#6)
Browse files- Add support for transformers.js (8eea22cb12cc5d205be48fc8120523d887f4abfe)
Co-authored-by: Joshua <[email protected]>
README.md
CHANGED
|
@@ -8,6 +8,7 @@ tags:
|
|
| 8 |
- mteb
|
| 9 |
- arctic
|
| 10 |
- snowflake-arctic-embed
|
|
|
|
| 11 |
model-index:
|
| 12 |
- name: snowflake-arctic-m-long
|
| 13 |
results:
|
|
@@ -3020,6 +3021,37 @@ If you use the long context model with more than 2048 tokens, ensure that you in
|
|
| 3020 |
model = AutoModel.from_pretrained('Snowflake/snowflake-arctic-embed-m-long', trust_remote_code=True, rotary_scaling_factor=2)
|
| 3021 |
```
|
| 3022 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3023 |
|
| 3024 |
## FAQ
|
| 3025 |
|
|
|
|
| 8 |
- mteb
|
| 9 |
- arctic
|
| 10 |
- snowflake-arctic-embed
|
| 11 |
+
- transformers.js
|
| 12 |
model-index:
|
| 13 |
- name: snowflake-arctic-m-long
|
| 14 |
results:
|
|
|
|
| 3021 |
model = AutoModel.from_pretrained('Snowflake/snowflake-arctic-embed-m-long', trust_remote_code=True, rotary_scaling_factor=2)
|
| 3022 |
```
|
| 3023 |
|
| 3024 |
+
### Using Transformers.js
|
| 3025 |
+
|
| 3026 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) by running:
|
| 3027 |
+
```bash
|
| 3028 |
+
npm i @xenova/transformers
|
| 3029 |
+
```
|
| 3030 |
+
|
| 3031 |
+
You can then use the model to compute embeddings as follows:
|
| 3032 |
+
|
| 3033 |
+
```js
|
| 3034 |
+
import { pipeline, dot } from '@xenova/transformers';
|
| 3035 |
+
|
| 3036 |
+
// Create feature extraction pipeline
|
| 3037 |
+
const extractor = await pipeline('feature-extraction', 'Snowflake/snowflake-arctic-embed-m-long', {
|
| 3038 |
+
quantized: false, // Comment out this line to use the quantized version
|
| 3039 |
+
});
|
| 3040 |
+
|
| 3041 |
+
// Generate sentence embeddings
|
| 3042 |
+
const sentences = [
|
| 3043 |
+
'Represent this sentence for searching relevant passages: Where can I get the best tacos?',
|
| 3044 |
+
'The Data Cloud!',
|
| 3045 |
+
'Mexico City of Course!',
|
| 3046 |
+
]
|
| 3047 |
+
const output = await extractor(sentences, { normalize: true, pooling: 'cls' });
|
| 3048 |
+
|
| 3049 |
+
// Compute similarity scores
|
| 3050 |
+
const [source_embeddings, ...document_embeddings ] = output.tolist();
|
| 3051 |
+
const similarities = document_embeddings.map(x => dot(source_embeddings, x));
|
| 3052 |
+
console.log(similarities); // [0.36740492125676116, 0.42407774292046635]
|
| 3053 |
+
```
|
| 3054 |
+
|
| 3055 |
|
| 3056 |
## FAQ
|
| 3057 |
|