Integrate with Sentence Transformers v5.4

by tomaarsen HF Staff - opened Apr 16

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+99

-17

Files changed (6) hide show

1_LogitScore/config.json +5 -0
README.md +47 -17
chat_template.jinja +7 -0
config_sentence_transformers.json +11 -0
modules.json +14 -0
sentence_bert_config.json +15 -0

1_LogitScore/config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "true_token_id": 0,
+    "false_token_id": null,
+    "module_input_name": "causal_logits"
+}

README.md CHANGED Viewed

@@ -2,6 +2,10 @@
 library_name: transformers
 license: cc-by-nc-sa-4.0
 pipeline_tag: text-ranking
 ---
 <div align="center">
@@ -51,37 +55,56 @@ Use this reranker when you need to:
 ## Quickstart
-### Basic Usage
 ```python
-# Choose vLLM (recommended for production) or Transformers (simpler setup)
-# See full implementation in sections below
-model_path = "ContextualAI/reranker_v2_6b"
 query = "What are the health benefits of exercise?"
 instruction = "Prioritize recent medical research"
 documents = [
     "Regular exercise reduces risk of heart disease and improves mental health.",
     "A 2024 study shows exercise enhances cognitive function in older adults.",
-    "Ancient Greeks valued physical fitness for military training."
 ]
-# Using vLLM (see full code below):
-infer_w_vllm(model_path, query, instruction, documents)
-# OR using Transformers (see full code below):
-infer_w_hf(model_path, query, instruction, documents)
 ```
-**Expected Output:**
-```
-Query: What are the health benefits of exercise?
-Instruction: Prioritize recent medical research
-Score: -2.2969 | Doc: A 2024 study shows exercise enhances cognitive function in older adults.
-Score: -4.6875 | Doc: Regular exercise reduces risk of heart disease and improves mental health.
-Score: -12.3750 | Doc: Ancient Greeks valued physical fitness for military training.
-```
 ### vLLM Usage (Recommended for Production)
@@ -223,6 +246,13 @@ def infer_w_hf(model_path: str, query: str, instruction: str, documents: list[st
     print(f"Instruction: {instruction}")
     for score, doc_id, doc in results:
         print(f"Score: {score:.4f} | Doc: {doc}")
 ```
 ## Citation

 library_name: transformers
 license: cc-by-nc-sa-4.0
 pipeline_tag: text-ranking
+tags:
+  - sentence-transformers
+  - cross-encoder
+  - reranker
 ---
 <div align="center">
 ## Quickstart
+Each path below uses the same example inputs:
+```
+Query: What are the health benefits of exercise?
+Instruction: Prioritize recent medical research
+Documents:
+  - Regular exercise reduces risk of heart disease and improves mental health.
+  - A 2024 study shows exercise enhances cognitive function in older adults.
+  - Ancient Greeks valued physical fitness for military training.
+```
+**Expected Output:**
+```
+Score: -2.2969 | Doc: A 2024 study shows exercise enhances cognitive function in older adults.
+Score: -4.6875 | Doc: Regular exercise reduces risk of heart disease and improves mental health.
+Score: -12.3750 | Doc: Ancient Greeks valued physical fitness for military training.
+```
+### Using Sentence Transformers
+Install Sentence Transformers:
+```bash
+pip install sentence_transformers
+```
 ```python
+import torch
+from sentence_transformers import CrossEncoder
+model = CrossEncoder("ContextualAI/ctxl-rerank-v2-instruct-multilingual-6b", model_kwargs={"dtype": torch.bfloat16})
 query = "What are the health benefits of exercise?"
 instruction = "Prioritize recent medical research"
 documents = [
     "Regular exercise reduces risk of heart disease and improves mental health.",
     "A 2024 study shows exercise enhances cognitive function in older adults.",
+    "Ancient Greeks valued physical fitness for military training.",
 ]
+pairs = [(query, doc) for doc in documents]
+scores = model.predict(pairs, prompt=instruction)
+print(scores)
+# [ -4.6875   -2.171875 -12.4375  ]
+rankings = model.rank(query, documents, prompt=instruction)
+print(rankings)
+# [{'corpus_id': 1, 'score': np.float32(-2.171875)}, {'corpus_id': 0, 'score': np.float32(-4.6875)}, {'corpus_id': 2, 'score': np.float32(-12.4375)}]
 ```
+The `prompt` argument is optional, you can omit it to score pairs without any custom instruction. Scores are the raw bfloat16 logits at token id 0 at the final position (matching the `Transformers` path below), so higher means more relevant.
 ### vLLM Usage (Recommended for Production)
     print(f"Instruction: {instruction}")
     for score, doc_id, doc in results:
         print(f"Score: {score:.4f} | Doc: {doc}")
+    """
+    Query: What are the health benefits of exercise?
+    Instruction: Prioritize recent medical research
+    Score: -2.1719 | Doc: A 2024 study shows exercise enhances cognitive function in older adults.
+    Score: -4.6875 | Doc: Regular exercise reduces risk of heart disease and improves mental health.
+    Score: -12.4375 | Doc: Ancient Greeks valued physical fitness for military training.
+    """
 ```
 ## Citation

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,7 @@

+{%- set instruction_text = messages | selectattr("role", "eq", "system") | map(attribute="content") | first | default("") -%}
+{%- set query_text = messages | selectattr("role", "eq", "query") | map(attribute="content") | first -%}
+{%- set document_text = messages | selectattr("role", "eq", "document") | map(attribute="content") | first -%}
+{{- bos_token -}}
+Check whether a given document contains information helpful to answer the query.
+<Document> {{ document_text }}
+<Query> {{ query_text }}{% if instruction_text %} {{ instruction_text }}{% endif %} ??

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "__version__": {
+    "pytorch": "2.10.0+cu128",
+    "sentence_transformers": "5.4.0",
+    "transformers": "5.5.0.dev0"
+  },
+  "activation_fn": "torch.nn.modules.linear.Identity",
+  "default_prompt_name": null,
+  "model_type": "CrossEncoder",
+  "prompts": {}
+}

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.base.modules.transformer.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_LogitScore",
+    "type": "sentence_transformers.cross_encoder.modules.logit_score.LogitScore"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "transformer_task": "text-generation",
+    "modality_config": {
+        "text": {
+            "method": "forward",
+            "method_output_name": "logits"
+        },
+        "message": {
+            "method": "forward",
+            "method_output_name": "logits",
+            "format": "flat"
+        }
+    },
+    "module_output_name": "causal_logits"
+}