Merge branch 'main' of https://huggingface.co/ybelkada/flan-t5-large into main
Browse files- README.md +6 -11
- config.json +0 -29
README.md
CHANGED
|
@@ -62,9 +62,7 @@ language:
|
|
| 62 |
- no
|
| 63 |
|
| 64 |
tags:
|
| 65 |
-
-
|
| 66 |
-
- translation
|
| 67 |
-
- text-generation
|
| 68 |
|
| 69 |
datasets:
|
| 70 |
- svakulenk0/qrecc
|
|
@@ -101,7 +99,7 @@ license: apache-2.0
|
|
| 101 |
|
| 102 |
# TL;DR
|
| 103 |
|
| 104 |
-
If you already know T5, FLAN-T5 is just better at everything. For the same number of parameters, these models have been fine-tuned on more than 1000 additional tasks covering also more languages.
|
| 105 |
As mentioned in the first few lines of the abstract :
|
| 106 |
> Flan-PaLM 540B achieves state-of-the-art performance on several benchmarks, such as 75.2% on five-shot MMLU. We also publicly release Flan-T5 checkpoints,1 which achieve strong few-shot performance even compared to much larger models, such as PaLM 62B. Overall, instruction finetuning is a general method for improving the performance and usability of pretrained language models.
|
| 107 |
|
|
@@ -155,7 +153,7 @@ print(tokenizer.decode(outputs[0]))
|
|
| 155 |
<summary> Click to expand </summary>
|
| 156 |
|
| 157 |
```python
|
| 158 |
-
|
| 159 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 160 |
|
| 161 |
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
|
|
@@ -178,6 +176,7 @@ print(tokenizer.decode(outputs[0]))
|
|
| 178 |
<summary> Click to expand </summary>
|
| 179 |
|
| 180 |
```python
|
|
|
|
| 181 |
import torch
|
| 182 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 183 |
|
|
@@ -199,7 +198,7 @@ print(tokenizer.decode(outputs[0]))
|
|
| 199 |
<summary> Click to expand </summary>
|
| 200 |
|
| 201 |
```python
|
| 202 |
-
# pip install bitsandbytes
|
| 203 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 204 |
|
| 205 |
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
|
|
@@ -308,8 +307,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
|
|
| 308 |
|
| 309 |
copyright = {Creative Commons Attribution 4.0 International}
|
| 310 |
}
|
| 311 |
-
```
|
| 312 |
-
|
| 313 |
-
# Model Card Authors
|
| 314 |
-
|
| 315 |
-
This model card was written by the team at Hugging Face.
|
|
|
|
| 62 |
- no
|
| 63 |
|
| 64 |
tags:
|
| 65 |
+
- text2text-generation
|
|
|
|
|
|
|
| 66 |
|
| 67 |
datasets:
|
| 68 |
- svakulenk0/qrecc
|
|
|
|
| 99 |
|
| 100 |
# TL;DR
|
| 101 |
|
| 102 |
+
If you already know T5, FLAN-T5 is just better at everything. For the same number of parameters, these models have been fine-tuned on more than 1000 additional tasks covering also more languages.
|
| 103 |
As mentioned in the first few lines of the abstract :
|
| 104 |
> Flan-PaLM 540B achieves state-of-the-art performance on several benchmarks, such as 75.2% on five-shot MMLU. We also publicly release Flan-T5 checkpoints,1 which achieve strong few-shot performance even compared to much larger models, such as PaLM 62B. Overall, instruction finetuning is a general method for improving the performance and usability of pretrained language models.
|
| 105 |
|
|
|
|
| 153 |
<summary> Click to expand </summary>
|
| 154 |
|
| 155 |
```python
|
| 156 |
+
# pip install accelerate
|
| 157 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 158 |
|
| 159 |
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
|
|
|
|
| 176 |
<summary> Click to expand </summary>
|
| 177 |
|
| 178 |
```python
|
| 179 |
+
# pip install accelerate
|
| 180 |
import torch
|
| 181 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 182 |
|
|
|
|
| 198 |
<summary> Click to expand </summary>
|
| 199 |
|
| 200 |
```python
|
| 201 |
+
# pip install bitsandbytes accelerate
|
| 202 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 203 |
|
| 204 |
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
|
|
|
|
| 307 |
|
| 308 |
copyright = {Creative Commons Attribution 4.0 International}
|
| 309 |
}
|
| 310 |
+
```
|
|
|
|
|
|
|
|
|
|
|
|
config.json
CHANGED
|
@@ -23,35 +23,6 @@
|
|
| 23 |
"pad_token_id": 0,
|
| 24 |
"relative_attention_max_distance": 128,
|
| 25 |
"relative_attention_num_buckets": 32,
|
| 26 |
-
"task_specific_params": {
|
| 27 |
-
"summarization": {
|
| 28 |
-
"early_stopping": true,
|
| 29 |
-
"length_penalty": 2.0,
|
| 30 |
-
"max_length": 200,
|
| 31 |
-
"min_length": 30,
|
| 32 |
-
"no_repeat_ngram_size": 3,
|
| 33 |
-
"num_beams": 4,
|
| 34 |
-
"prefix": "summarize: "
|
| 35 |
-
},
|
| 36 |
-
"translation_en_to_de": {
|
| 37 |
-
"early_stopping": true,
|
| 38 |
-
"max_length": 300,
|
| 39 |
-
"num_beams": 4,
|
| 40 |
-
"prefix": "translate English to German: "
|
| 41 |
-
},
|
| 42 |
-
"translation_en_to_fr": {
|
| 43 |
-
"early_stopping": true,
|
| 44 |
-
"max_length": 300,
|
| 45 |
-
"num_beams": 4,
|
| 46 |
-
"prefix": "translate English to French: "
|
| 47 |
-
},
|
| 48 |
-
"translation_en_to_ro": {
|
| 49 |
-
"early_stopping": true,
|
| 50 |
-
"max_length": 300,
|
| 51 |
-
"num_beams": 4,
|
| 52 |
-
"prefix": "translate English to Romanian: "
|
| 53 |
-
}
|
| 54 |
-
},
|
| 55 |
"tie_word_embeddings": false,
|
| 56 |
"transformers_version": "4.23.1",
|
| 57 |
"use_cache": true,
|
|
|
|
| 23 |
"pad_token_id": 0,
|
| 24 |
"relative_attention_max_distance": 128,
|
| 25 |
"relative_attention_num_buckets": 32,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
"tie_word_embeddings": false,
|
| 27 |
"transformers_version": "4.23.1",
|
| 28 |
"use_cache": true,
|