Commit
·
27c33d2
1
Parent(s):
ebd4ed4
updating all configs, adding both fast and legacy tokenizers, also adding tensorflow checkpoint for compatibility
Browse files- config.json +5 -0
- pytorch_model.bin +2 -2
- special_tokens_map.json +7 -1
- tf_model.h5 +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +13 -1
- vocab.txt +36 -36
config.json
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
{
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"BertForMaskedLM"
|
| 4 |
],
|
| 5 |
"attention_probs_dropout_prob": 0.1,
|
|
|
|
| 6 |
"hidden_act": "gelu",
|
| 7 |
"hidden_dropout_prob": 0.1,
|
| 8 |
"hidden_size": 768,
|
|
@@ -15,6 +17,9 @@
|
|
| 15 |
"num_hidden_layers": 12,
|
| 16 |
"output_past": true,
|
| 17 |
"pad_token_id": 1,
|
|
|
|
|
|
|
| 18 |
"type_vocab_size": 2,
|
|
|
|
| 19 |
"vocab_size": 31002
|
| 20 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "dccuchile/bert-base-spanish-wwm-uncased",
|
| 3 |
"architectures": [
|
| 4 |
"BertForMaskedLM"
|
| 5 |
],
|
| 6 |
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"gradient_checkpointing": false,
|
| 8 |
"hidden_act": "gelu",
|
| 9 |
"hidden_dropout_prob": 0.1,
|
| 10 |
"hidden_size": 768,
|
|
|
|
| 17 |
"num_hidden_layers": 12,
|
| 18 |
"output_past": true,
|
| 19 |
"pad_token_id": 1,
|
| 20 |
+
"position_embedding_type": "absolute",
|
| 21 |
+
"transformers_version": "4.4.2",
|
| 22 |
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
"vocab_size": 31002
|
| 25 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5480283d2ac26ac36df538fa5c12412b89ff176db693d00e71735200d9e0e99b
|
| 3 |
+
size 439621341
|
special_tokens_map.json
CHANGED
|
@@ -1 +1,7 @@
|
|
| 1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"unk_token": "[UNK]",
|
| 3 |
+
"sep_token": "[SEP]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"cls_token": "[CLS]",
|
| 6 |
+
"mask_token": "[MASK]"
|
| 7 |
+
}
|
tf_model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07e82361c425346a2d05b8f7ed638388e1f617416e2d9763224ff96ca0914e18
|
| 3 |
+
size 536635336
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
CHANGED
|
@@ -1 +1,13 @@
|
|
| 1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_lower_case": true,
|
| 3 |
+
"unk_token": "[UNK]",
|
| 4 |
+
"sep_token": "[SEP]",
|
| 5 |
+
"pad_token": "[PAD]",
|
| 6 |
+
"cls_token": "[CLS]",
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"tokenize_chinese_chars": true,
|
| 9 |
+
"strip_accents": false,
|
| 10 |
+
"do_basic_tokenize": true,
|
| 11 |
+
"never_split": null,
|
| 12 |
+
"model_max_length": 512
|
| 13 |
+
}
|
vocab.txt
CHANGED
|
@@ -956,42 +956,42 @@
|
|
| 956 |
[unused949]
|
| 957 |
[unused950]
|
| 958 |
[unused951]
|
| 959 |
-
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
|
| 970 |
-
|
| 971 |
-
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
[
|
| 978 |
-
[
|
| 979 |
-
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
|
| 991 |
-
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
|
| 995 |
##:
|
| 996 |
:
|
| 997 |
##8
|
|
|
|
| 956 |
[unused949]
|
| 957 |
[unused950]
|
| 958 |
[unused951]
|
| 959 |
+
##|
|
| 960 |
+
|
|
| 961 |
+
##}
|
| 962 |
+
}
|
| 963 |
+
##{
|
| 964 |
+
{
|
| 965 |
+
##_
|
| 966 |
+
_
|
| 967 |
+
##+
|
| 968 |
+
+
|
| 969 |
+
##*
|
| 970 |
+
*
|
| 971 |
+
##&
|
| 972 |
+
&
|
| 973 |
+
##$
|
| 974 |
+
$
|
| 975 |
+
##]
|
| 976 |
+
]
|
| 977 |
+
##[
|
| 978 |
+
[
|
| 979 |
+
##=
|
| 980 |
+
=
|
| 981 |
+
##>
|
| 982 |
+
>
|
| 983 |
+
##<
|
| 984 |
+
<
|
| 985 |
+
##@
|
| 986 |
+
@
|
| 987 |
+
##\
|
| 988 |
+
\
|
| 989 |
+
##/
|
| 990 |
+
/
|
| 991 |
+
##%
|
| 992 |
+
%
|
| 993 |
+
##;
|
| 994 |
+
;
|
| 995 |
##:
|
| 996 |
:
|
| 997 |
##8
|