Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +461 -0
- config.json +24 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 384,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,461 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
- dataset_size:5700
|
| 8 |
+
- loss:TripletLoss
|
| 9 |
+
base_model: thenlper/gte-small
|
| 10 |
+
widget:
|
| 11 |
+
- source_sentence: Statutes are often called ________ law.
|
| 12 |
+
sentences:
|
| 13 |
+
- Calculate spin density on the central carbon atom of malonic acid radical (•CH(COOH)2)
|
| 14 |
+
if the hyperfine value for the α-hydrogen atom is 21.9 G.
|
| 15 |
+
- Which of the following quotations best describes the central thesis of difference
|
| 16 |
+
feminism?
|
| 17 |
+
- 'If a relevant variable is omitted from a regression equation, the consequences
|
| 18 |
+
would be that:
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
i) The standard errors would be biased
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
ii) If the excluded variable is uncorrelated with all of the included variables,
|
| 26 |
+
all of
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
the slope coefficients will be inconsistent.
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
iii) If the excluded variable is uncorrelated with all of the included variables,
|
| 34 |
+
the
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
intercept coefficient will be inconsistent.
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
iv) If the excluded variable is uncorrelated with all of the included variables,
|
| 42 |
+
all of
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
the slope and intercept coefficients will be consistent and unbiased but inefficient.'
|
| 46 |
+
- source_sentence: Let M be a 5 x 5 real matrix. Exactly four of the following five
|
| 47 |
+
conditions on M are equivalent to each other. Which of the five conditions is
|
| 48 |
+
equivalent to NONE of the other four?
|
| 49 |
+
sentences:
|
| 50 |
+
- 'The royal graves of the Shang Dynasty consisted of enormous cruciform-shaped
|
| 51 |
+
tombs, where the deceased kings were buried with:'
|
| 52 |
+
- The region bounded by the curves y = x and y = x^2 in the first quadrant of the
|
| 53 |
+
xy-plane is rotated about the y-axis. The volume of the resulting solid of revolution
|
| 54 |
+
is
|
| 55 |
+
- 'The energy released from the breakdown of the high-energy phosphates, ATP and
|
| 56 |
+
phosphocreatine, can sustain maximal exertion exercise for about:'
|
| 57 |
+
- source_sentence: Which sequence describes the systemic circulation?
|
| 58 |
+
sentences:
|
| 59 |
+
- Which of the following best describes the process whereby the stomach muscles
|
| 60 |
+
contract to propel food through the digestive tract?
|
| 61 |
+
- The fallacy of guilt by association is a specific type of
|
| 62 |
+
- 'Baier argues that genuine moral rules:'
|
| 63 |
+
- source_sentence: 'This question refers to the following information.
|
| 64 |
+
|
| 65 |
+
Although in Protestant Europe, [Peter the Great] was surrounded by evidence of
|
| 66 |
+
the new civil and political rights of individual men embodied in constitutions,
|
| 67 |
+
bills of rights and parliaments, he did not return to Russia determined to share
|
| 68 |
+
power with his people. On the contrary, he returned not only determined to change
|
| 69 |
+
his country but also convinced that if Russia was to be transformed, it was he
|
| 70 |
+
who must provide both the direction and the motive force. He would try to lead;
|
| 71 |
+
but where education and persuasion were not enough, he could drive—and if necessary
|
| 72 |
+
flog—the backward nation forward.
|
| 73 |
+
|
| 74 |
+
—Robert K. Massie, Peter the Great: His Life and World
|
| 75 |
+
|
| 76 |
+
Based on the above passage, what kinds of reforms did Peter the Great embrace?'
|
| 77 |
+
sentences:
|
| 78 |
+
- 'Identify the antecedent of the following conditional proposition: When the university
|
| 79 |
+
raises tuition, then either the governor approves of it or the board of trustees
|
| 80 |
+
doesn''t prevent it.'
|
| 81 |
+
- Which of the following disorders is not suitable for population carrier screening?
|
| 82 |
+
- 'This question refers to the following information.
|
| 83 |
+
|
| 84 |
+
"To slacken the tempo would mean falling behind. And those who fall behind get
|
| 85 |
+
beaten. But we do not want to be beaten. No, we refuse to be beaten! One feature
|
| 86 |
+
of the history of old Russia was the continual beatings she suffered because of
|
| 87 |
+
her backwardness. She was beaten by the Mongol khans. She was beaten by the Turkish
|
| 88 |
+
beys. She was beaten by the Swedish feudal lords. She was beaten by the Polish
|
| 89 |
+
and Lithuanian gentry. She was beaten by the British and French capitalists. She
|
| 90 |
+
was beaten by the Japanese barons. All beat her––because of her backwardness,
|
| 91 |
+
because of her military backwardness, cultural backwardness, political backwardness,
|
| 92 |
+
industrial backwardness, agricultural backwardness. They beat her because it was
|
| 93 |
+
profitable and could be done with impunity. You remember the words of the pre-revolutionary
|
| 94 |
+
poet: "You are poor and abundant, mighty and impotent, Mother Russia." Those gentlemen
|
| 95 |
+
were quite familiar with the verses of the old poet. They beat her, saying: "You
|
| 96 |
+
are abundant," so one can enrich oneself at your expense. They beat her, saying:
|
| 97 |
+
"You are poor and impotent," so you can be beaten and plundered with impunity.
|
| 98 |
+
Such is the law of the exploiters––to beat the backward and the weak. It is the
|
| 99 |
+
jungle law of capitalism. You are backward, you are weak––therefore you are wrong;
|
| 100 |
+
hence you can be beaten and enslaved. You are mighty––therefore you are right;
|
| 101 |
+
hence we must be wary of you.
|
| 102 |
+
|
| 103 |
+
That is why we must no longer lag behind."
|
| 104 |
+
|
| 105 |
+
Joseph Stalin, speech delivered at the first All-Union Conference of Leading Personnel
|
| 106 |
+
of Socialist Industry, February 4, 1931
|
| 107 |
+
|
| 108 |
+
Stalin''s efforts to advance Russia as justified by his mention of the "continual
|
| 109 |
+
beatings" were vindicated by which of the following historical events?'
|
| 110 |
+
- source_sentence: Gulde’s tax basis in Chyme Partnership was $26,000 at the time
|
| 111 |
+
Gulde received a liquidating distribution of $12,000 cash and land with an adjusted
|
| 112 |
+
basis to Chyme of $10,000 and a fair market value of $30,000. Chyme did not have
|
| 113 |
+
unrealized receivables, appreciated inventory, or properties that had been contributed
|
| 114 |
+
by its partners. What was the amount of Gulde’s basis in the land?
|
| 115 |
+
sentences:
|
| 116 |
+
- What is direct diplomacy?
|
| 117 |
+
- The percentage of children in Ethiopia (age 8) who reported physical punishment
|
| 118 |
+
by teachers in the past week in 2009 was about what?
|
| 119 |
+
- A company exchanged land with an appraised value of $50,000 and an original cost
|
| 120 |
+
of $20,000 for machinery with a fair value of $55,000. Assuming that the transaction
|
| 121 |
+
has commercial substance, what is the gain on the exchange?
|
| 122 |
+
pipeline_tag: sentence-similarity
|
| 123 |
+
library_name: sentence-transformers
|
| 124 |
+
---
|
| 125 |
+
|
| 126 |
+
# SentenceTransformer based on thenlper/gte-small
|
| 127 |
+
|
| 128 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [thenlper/gte-small](https://huggingface.co/thenlper/gte-small). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 129 |
+
|
| 130 |
+
## Model Details
|
| 131 |
+
|
| 132 |
+
### Model Description
|
| 133 |
+
- **Model Type:** Sentence Transformer
|
| 134 |
+
- **Base model:** [thenlper/gte-small](https://huggingface.co/thenlper/gte-small) <!-- at revision 17e1f347d17fe144873b1201da91788898c639cd -->
|
| 135 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 136 |
+
- **Output Dimensionality:** 384 dimensions
|
| 137 |
+
- **Similarity Function:** Cosine Similarity
|
| 138 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 139 |
+
<!-- - **Language:** Unknown -->
|
| 140 |
+
<!-- - **License:** Unknown -->
|
| 141 |
+
|
| 142 |
+
### Model Sources
|
| 143 |
+
|
| 144 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 145 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 146 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 147 |
+
|
| 148 |
+
### Full Model Architecture
|
| 149 |
+
|
| 150 |
+
```
|
| 151 |
+
SentenceTransformer(
|
| 152 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
|
| 153 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 154 |
+
)
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
## Usage
|
| 158 |
+
|
| 159 |
+
### Direct Usage (Sentence Transformers)
|
| 160 |
+
|
| 161 |
+
First install the Sentence Transformers library:
|
| 162 |
+
|
| 163 |
+
```bash
|
| 164 |
+
pip install -U sentence-transformers
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
Then you can load this model and run inference.
|
| 168 |
+
```python
|
| 169 |
+
from sentence_transformers import SentenceTransformer
|
| 170 |
+
|
| 171 |
+
# Download from the 🤗 Hub
|
| 172 |
+
model = SentenceTransformer("Alexhuou/embedder_model")
|
| 173 |
+
# Run inference
|
| 174 |
+
sentences = [
|
| 175 |
+
'Gulde’s tax basis in Chyme Partnership was $26,000 at the time Gulde received a liquidating distribution of $12,000 cash and land with an adjusted basis to Chyme of $10,000 and a fair market value of $30,000. Chyme did not have unrealized receivables, appreciated inventory, or properties that had been contributed by its partners. What was the amount of Gulde’s basis in the land?',
|
| 176 |
+
'A company exchanged land with an appraised value of $50,000 and an original cost of $20,000 for machinery with a fair value of $55,000. Assuming that the transaction has commercial substance, what is the gain on the exchange?',
|
| 177 |
+
'The percentage of children in Ethiopia (age 8) who reported physical punishment by teachers in the past week in 2009 was about what?',
|
| 178 |
+
]
|
| 179 |
+
embeddings = model.encode(sentences)
|
| 180 |
+
print(embeddings.shape)
|
| 181 |
+
# [3, 384]
|
| 182 |
+
|
| 183 |
+
# Get the similarity scores for the embeddings
|
| 184 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 185 |
+
print(similarities.shape)
|
| 186 |
+
# [3, 3]
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
<!--
|
| 190 |
+
### Direct Usage (Transformers)
|
| 191 |
+
|
| 192 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 193 |
+
|
| 194 |
+
</details>
|
| 195 |
+
-->
|
| 196 |
+
|
| 197 |
+
<!--
|
| 198 |
+
### Downstream Usage (Sentence Transformers)
|
| 199 |
+
|
| 200 |
+
You can finetune this model on your own dataset.
|
| 201 |
+
|
| 202 |
+
<details><summary>Click to expand</summary>
|
| 203 |
+
|
| 204 |
+
</details>
|
| 205 |
+
-->
|
| 206 |
+
|
| 207 |
+
<!--
|
| 208 |
+
### Out-of-Scope Use
|
| 209 |
+
|
| 210 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 211 |
+
-->
|
| 212 |
+
|
| 213 |
+
<!--
|
| 214 |
+
## Bias, Risks and Limitations
|
| 215 |
+
|
| 216 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 217 |
+
-->
|
| 218 |
+
|
| 219 |
+
<!--
|
| 220 |
+
### Recommendations
|
| 221 |
+
|
| 222 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 223 |
+
-->
|
| 224 |
+
|
| 225 |
+
## Training Details
|
| 226 |
+
|
| 227 |
+
### Training Dataset
|
| 228 |
+
|
| 229 |
+
#### Unnamed Dataset
|
| 230 |
+
|
| 231 |
+
* Size: 5,700 training samples
|
| 232 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
|
| 233 |
+
* Approximate statistics based on the first 1000 samples:
|
| 234 |
+
| | sentence_0 | sentence_1 | sentence_2 |
|
| 235 |
+
|:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
| 236 |
+
| type | string | string | string |
|
| 237 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 49.22 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 48.59 tokens</li><li>max: 440 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 41.92 tokens</li><li>max: 512 tokens</li></ul> |
|
| 238 |
+
* Samples:
|
| 239 |
+
| sentence_0 | sentence_1 | sentence_2 |
|
| 240 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 241 |
+
| <code>This question refers to the following information.<br>"The spontaneous forces of capitalism have been steadily growing in the countryside in recent years, with new rich peasants springing up everywhere and many well-to-do middle peasants striving to become rich peasants. On the other hand, many poor peasants are still living in poverty for lack of sufficient means of production, with some in debt and others selling or renting out their land. If this tendency goes unchecked, the polarization in the countryside will inevitably be aggravated day by day. Those peasants who lose their land and those who remain in poverty will complain that we are doing nothing to save them from ruin or to help them overcome their difficulties. Nor will the well-to-do middle peasants who are heading in the capitalist direction be pleased with us, for we shall never be able to satisfy their demands unless we intend to take the capitalist road. Can the worker-peasant alliance continue to stand in these circumstan...</code> | <code>This question refers to the following information.<br>Woman, wake up; the bell of reason is being heard throughout the whole universe; discover your rights. Enslaved man has multiplied his strength, [but] having become free, he has become unjust to his companion. Oh, women, women! When will you cease to be blind? What advantage have you received from the Revolution? A more pronounced scorn, a more marked disdain. If our leaders persist, courageously oppose the force of reason to their empty pretentions of superiority. Regardless of what barriers confront you, it is in your power to free yourselves!<br>Olympe de Gouges, "Declaration of the Rights of Woman and the Female Citizen," 1791<br>The independence? Nothing of what I hoped for was achieved. I had expected that my children would be able to have an education, but they did not get it. We were poor peasants then, we are poor peasants now. Nothing has changed. Everything is the same. The only thing is that we are free, the war is over, we work ...</code> | <code>Which of the following most likely explains why Venus does not have a strong magnetic field?</code> |
|
| 242 |
+
| <code> In conducting international market research, there are three types of equivalence. Which of the following is NOT one of the equivalences?</code> | <code> Economic—marketing should encourage long-term economic development as opposed to short-term economic development.</code> | <code>The domain of the function $h(x) = \sqrt{25-x^2}+\sqrt{-(x-2)}$ is an interval of what width?</code> |
|
| 243 |
+
| <code>Which value is the most reasonable estimate of the volume of air an adult breathes in one day?</code> | <code>By what nickname is the Federal National Mortgage Association known?</code> | <code>If technology makes production less expensive and at the same time exports decrease which of the following will result with certainty?</code> |
|
| 244 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
| 245 |
+
```json
|
| 246 |
+
{
|
| 247 |
+
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
| 248 |
+
"triplet_margin": 5
|
| 249 |
+
}
|
| 250 |
+
```
|
| 251 |
+
|
| 252 |
+
### Training Hyperparameters
|
| 253 |
+
#### Non-Default Hyperparameters
|
| 254 |
+
|
| 255 |
+
- `per_device_train_batch_size`: 16
|
| 256 |
+
- `per_device_eval_batch_size`: 16
|
| 257 |
+
- `num_train_epochs`: 30
|
| 258 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 259 |
+
|
| 260 |
+
#### All Hyperparameters
|
| 261 |
+
<details><summary>Click to expand</summary>
|
| 262 |
+
|
| 263 |
+
- `overwrite_output_dir`: False
|
| 264 |
+
- `do_predict`: False
|
| 265 |
+
- `eval_strategy`: no
|
| 266 |
+
- `prediction_loss_only`: True
|
| 267 |
+
- `per_device_train_batch_size`: 16
|
| 268 |
+
- `per_device_eval_batch_size`: 16
|
| 269 |
+
- `per_gpu_train_batch_size`: None
|
| 270 |
+
- `per_gpu_eval_batch_size`: None
|
| 271 |
+
- `gradient_accumulation_steps`: 1
|
| 272 |
+
- `eval_accumulation_steps`: None
|
| 273 |
+
- `torch_empty_cache_steps`: None
|
| 274 |
+
- `learning_rate`: 5e-05
|
| 275 |
+
- `weight_decay`: 0.0
|
| 276 |
+
- `adam_beta1`: 0.9
|
| 277 |
+
- `adam_beta2`: 0.999
|
| 278 |
+
- `adam_epsilon`: 1e-08
|
| 279 |
+
- `max_grad_norm`: 1
|
| 280 |
+
- `num_train_epochs`: 30
|
| 281 |
+
- `max_steps`: -1
|
| 282 |
+
- `lr_scheduler_type`: linear
|
| 283 |
+
- `lr_scheduler_kwargs`: {}
|
| 284 |
+
- `warmup_ratio`: 0.0
|
| 285 |
+
- `warmup_steps`: 0
|
| 286 |
+
- `log_level`: passive
|
| 287 |
+
- `log_level_replica`: warning
|
| 288 |
+
- `log_on_each_node`: True
|
| 289 |
+
- `logging_nan_inf_filter`: True
|
| 290 |
+
- `save_safetensors`: True
|
| 291 |
+
- `save_on_each_node`: False
|
| 292 |
+
- `save_only_model`: False
|
| 293 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 294 |
+
- `no_cuda`: False
|
| 295 |
+
- `use_cpu`: False
|
| 296 |
+
- `use_mps_device`: False
|
| 297 |
+
- `seed`: 42
|
| 298 |
+
- `data_seed`: None
|
| 299 |
+
- `jit_mode_eval`: False
|
| 300 |
+
- `use_ipex`: False
|
| 301 |
+
- `bf16`: False
|
| 302 |
+
- `fp16`: False
|
| 303 |
+
- `fp16_opt_level`: O1
|
| 304 |
+
- `half_precision_backend`: auto
|
| 305 |
+
- `bf16_full_eval`: False
|
| 306 |
+
- `fp16_full_eval`: False
|
| 307 |
+
- `tf32`: None
|
| 308 |
+
- `local_rank`: 0
|
| 309 |
+
- `ddp_backend`: None
|
| 310 |
+
- `tpu_num_cores`: None
|
| 311 |
+
- `tpu_metrics_debug`: False
|
| 312 |
+
- `debug`: []
|
| 313 |
+
- `dataloader_drop_last`: False
|
| 314 |
+
- `dataloader_num_workers`: 0
|
| 315 |
+
- `dataloader_prefetch_factor`: None
|
| 316 |
+
- `past_index`: -1
|
| 317 |
+
- `disable_tqdm`: False
|
| 318 |
+
- `remove_unused_columns`: True
|
| 319 |
+
- `label_names`: None
|
| 320 |
+
- `load_best_model_at_end`: False
|
| 321 |
+
- `ignore_data_skip`: False
|
| 322 |
+
- `fsdp`: []
|
| 323 |
+
- `fsdp_min_num_params`: 0
|
| 324 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 325 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 326 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 327 |
+
- `deepspeed`: None
|
| 328 |
+
- `label_smoothing_factor`: 0.0
|
| 329 |
+
- `optim`: adamw_torch
|
| 330 |
+
- `optim_args`: None
|
| 331 |
+
- `adafactor`: False
|
| 332 |
+
- `group_by_length`: False
|
| 333 |
+
- `length_column_name`: length
|
| 334 |
+
- `ddp_find_unused_parameters`: None
|
| 335 |
+
- `ddp_bucket_cap_mb`: None
|
| 336 |
+
- `ddp_broadcast_buffers`: False
|
| 337 |
+
- `dataloader_pin_memory`: True
|
| 338 |
+
- `dataloader_persistent_workers`: False
|
| 339 |
+
- `skip_memory_metrics`: True
|
| 340 |
+
- `use_legacy_prediction_loop`: False
|
| 341 |
+
- `push_to_hub`: False
|
| 342 |
+
- `resume_from_checkpoint`: None
|
| 343 |
+
- `hub_model_id`: None
|
| 344 |
+
- `hub_strategy`: every_save
|
| 345 |
+
- `hub_private_repo`: None
|
| 346 |
+
- `hub_always_push`: False
|
| 347 |
+
- `gradient_checkpointing`: False
|
| 348 |
+
- `gradient_checkpointing_kwargs`: None
|
| 349 |
+
- `include_inputs_for_metrics`: False
|
| 350 |
+
- `include_for_metrics`: []
|
| 351 |
+
- `eval_do_concat_batches`: True
|
| 352 |
+
- `fp16_backend`: auto
|
| 353 |
+
- `push_to_hub_model_id`: None
|
| 354 |
+
- `push_to_hub_organization`: None
|
| 355 |
+
- `mp_parameters`:
|
| 356 |
+
- `auto_find_batch_size`: False
|
| 357 |
+
- `full_determinism`: False
|
| 358 |
+
- `torchdynamo`: None
|
| 359 |
+
- `ray_scope`: last
|
| 360 |
+
- `ddp_timeout`: 1800
|
| 361 |
+
- `torch_compile`: False
|
| 362 |
+
- `torch_compile_backend`: None
|
| 363 |
+
- `torch_compile_mode`: None
|
| 364 |
+
- `include_tokens_per_second`: False
|
| 365 |
+
- `include_num_input_tokens_seen`: False
|
| 366 |
+
- `neftune_noise_alpha`: None
|
| 367 |
+
- `optim_target_modules`: None
|
| 368 |
+
- `batch_eval_metrics`: False
|
| 369 |
+
- `eval_on_start`: False
|
| 370 |
+
- `use_liger_kernel`: False
|
| 371 |
+
- `eval_use_gather_object`: False
|
| 372 |
+
- `average_tokens_across_devices`: False
|
| 373 |
+
- `prompts`: None
|
| 374 |
+
- `batch_sampler`: batch_sampler
|
| 375 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 376 |
+
|
| 377 |
+
</details>
|
| 378 |
+
|
| 379 |
+
### Training Logs
|
| 380 |
+
| Epoch | Step | Training Loss |
|
| 381 |
+
|:-------:|:-----:|:-------------:|
|
| 382 |
+
| 1.4006 | 500 | 1.7342 |
|
| 383 |
+
| 2.8011 | 1000 | 0.8812 |
|
| 384 |
+
| 1.4006 | 500 | 0.5667 |
|
| 385 |
+
| 2.8011 | 1000 | 0.3886 |
|
| 386 |
+
| 4.2017 | 1500 | 0.2434 |
|
| 387 |
+
| 5.6022 | 2000 | 0.1532 |
|
| 388 |
+
| 7.0028 | 2500 | 0.1159 |
|
| 389 |
+
| 8.4034 | 3000 | 0.079 |
|
| 390 |
+
| 9.8039 | 3500 | 0.0524 |
|
| 391 |
+
| 11.2045 | 4000 | 0.0442 |
|
| 392 |
+
| 12.6050 | 4500 | 0.03 |
|
| 393 |
+
| 14.0056 | 5000 | 0.0246 |
|
| 394 |
+
| 15.4062 | 5500 | 0.0196 |
|
| 395 |
+
| 16.8067 | 6000 | 0.0137 |
|
| 396 |
+
| 18.2073 | 6500 | 0.0161 |
|
| 397 |
+
| 19.6078 | 7000 | 0.0093 |
|
| 398 |
+
| 21.0084 | 7500 | 0.0109 |
|
| 399 |
+
| 22.4090 | 8000 | 0.0055 |
|
| 400 |
+
| 23.8095 | 8500 | 0.0047 |
|
| 401 |
+
| 25.2101 | 9000 | 0.0044 |
|
| 402 |
+
| 26.6106 | 9500 | 0.0033 |
|
| 403 |
+
| 28.0112 | 10000 | 0.0043 |
|
| 404 |
+
| 29.4118 | 10500 | 0.0027 |
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
### Framework Versions
|
| 408 |
+
- Python: 3.11.13
|
| 409 |
+
- Sentence Transformers: 4.1.0
|
| 410 |
+
- Transformers: 4.52.4
|
| 411 |
+
- PyTorch: 2.6.0+cu124
|
| 412 |
+
- Accelerate: 1.7.0
|
| 413 |
+
- Datasets: 3.6.0
|
| 414 |
+
- Tokenizers: 0.21.1
|
| 415 |
+
|
| 416 |
+
## Citation
|
| 417 |
+
|
| 418 |
+
### BibTeX
|
| 419 |
+
|
| 420 |
+
#### Sentence Transformers
|
| 421 |
+
```bibtex
|
| 422 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 423 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 424 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 425 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 426 |
+
month = "11",
|
| 427 |
+
year = "2019",
|
| 428 |
+
publisher = "Association for Computational Linguistics",
|
| 429 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 430 |
+
}
|
| 431 |
+
```
|
| 432 |
+
|
| 433 |
+
#### TripletLoss
|
| 434 |
+
```bibtex
|
| 435 |
+
@misc{hermans2017defense,
|
| 436 |
+
title={In Defense of the Triplet Loss for Person Re-Identification},
|
| 437 |
+
author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
|
| 438 |
+
year={2017},
|
| 439 |
+
eprint={1703.07737},
|
| 440 |
+
archivePrefix={arXiv},
|
| 441 |
+
primaryClass={cs.CV}
|
| 442 |
+
}
|
| 443 |
+
```
|
| 444 |
+
|
| 445 |
+
<!--
|
| 446 |
+
## Glossary
|
| 447 |
+
|
| 448 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 449 |
+
-->
|
| 450 |
+
|
| 451 |
+
<!--
|
| 452 |
+
## Model Card Authors
|
| 453 |
+
|
| 454 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 455 |
+
-->
|
| 456 |
+
|
| 457 |
+
<!--
|
| 458 |
+
## Model Card Contact
|
| 459 |
+
|
| 460 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 461 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 384,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 1536,
|
| 12 |
+
"layer_norm_eps": 1e-12,
|
| 13 |
+
"max_position_embeddings": 512,
|
| 14 |
+
"model_type": "bert",
|
| 15 |
+
"num_attention_heads": 12,
|
| 16 |
+
"num_hidden_layers": 12,
|
| 17 |
+
"pad_token_id": 0,
|
| 18 |
+
"position_embedding_type": "absolute",
|
| 19 |
+
"torch_dtype": "float32",
|
| 20 |
+
"transformers_version": "4.52.4",
|
| 21 |
+
"type_vocab_size": 2,
|
| 22 |
+
"use_cache": true,
|
| 23 |
+
"vocab_size": 30522
|
| 24 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "4.1.0",
|
| 4 |
+
"transformers": "4.52.4",
|
| 5 |
+
"pytorch": "2.6.0+cu124"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null,
|
| 9 |
+
"similarity_fn_name": "cosine"
|
| 10 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6471aea9fa834c04be86c840f1e0ecee8e75f37e5bf98a6071cddeb002a09a89
|
| 3 |
+
size 133462128
|
modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": true,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": true,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "[MASK]",
|
| 50 |
+
"max_length": 128,
|
| 51 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 52 |
+
"never_split": null,
|
| 53 |
+
"pad_to_multiple_of": null,
|
| 54 |
+
"pad_token": "[PAD]",
|
| 55 |
+
"pad_token_type_id": 0,
|
| 56 |
+
"padding_side": "right",
|
| 57 |
+
"sep_token": "[SEP]",
|
| 58 |
+
"stride": 0,
|
| 59 |
+
"strip_accents": null,
|
| 60 |
+
"tokenize_chinese_chars": true,
|
| 61 |
+
"tokenizer_class": "BertTokenizer",
|
| 62 |
+
"truncation_side": "right",
|
| 63 |
+
"truncation_strategy": "longest_first",
|
| 64 |
+
"unk_token": "[UNK]"
|
| 65 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|