zl369 commited on
Commit
247451c
·
verified ·
1 Parent(s): 879533b

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. config.json +27 -0
  2. log.txt +296 -0
  3. step_74124_pytorch_model.bin +3 -0
  4. vocab.txt +0 -0
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "directionality": "bidi",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 32,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 128,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 4,
16
+ "num_hidden_layers": 4,
17
+ "pad_token_id": 0,
18
+ "pooler_fc_size": 32,
19
+ "pooler_num_attention_heads": 4,
20
+ "pooler_num_fc_layers": 3,
21
+ "pooler_size_per_head": 8,
22
+ "pooler_type": "first_token_transform",
23
+ "pre_trained": "",
24
+ "training": "",
25
+ "type_vocab_size": 2,
26
+ "vocab_size": 119547
27
+ }
log.txt ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ att_loss = 165.9651178506998
2
+ global_step = 999
3
+ loss = 44.97073807873883
4
+ rep_loss = 13.917834387407884
5
+ att_loss = 152.55476939385983
6
+ global_step = 1999
7
+ loss = 41.532161897751855
8
+ rep_loss = 13.573878260956459
9
+ att_loss = 138.32189903246558
10
+ global_step = 2999
11
+ loss = 37.904360088120704
12
+ rep_loss = 13.295541353645465
13
+ att_loss = 129.30404715503445
14
+ global_step = 3999
15
+ loss = 35.58956172341673
16
+ rep_loss = 13.054199772436757
17
+ att_loss = 119.63957041000027
18
+ global_step = 4999
19
+ loss = 33.12675918429154
20
+ rep_loss = 12.86746633794456
21
+ att_loss = 109.77276303685095
22
+ global_step = 5999
23
+ loss = 30.609275317982963
24
+ rep_loss = 12.664338246328192
25
+ att_loss = 102.02662080682198
26
+ global_step = 6999
27
+ loss = 28.61658605365587
28
+ rep_loss = 12.439723421563624
29
+ att_loss = 95.12835736232395
30
+ global_step = 7999
31
+ loss = 26.83650435163224
32
+ rep_loss = 12.217660057647539
33
+ att_loss = 89.21587809017862
34
+ global_step = 8999
35
+ loss = 25.304571141639222
36
+ rep_loss = 12.002406489413266
37
+ att_loss = 84.34852850204683
38
+ global_step = 9999
39
+ loss = 24.03592947626462
40
+ rep_loss = 11.79518941626905
41
+ att_loss = 80.04609476799249
42
+ global_step = 10999
43
+ loss = 22.912895704271662
44
+ rep_loss = 11.60548806140635
45
+ att_loss = 76.0886288830137
46
+ global_step = 11999
47
+ loss = 21.87929865471253
48
+ rep_loss = 11.428565748672364
49
+ att_loss = 72.60697973719999
50
+ global_step = 12999
51
+ loss = 20.966439453452647
52
+ rep_loss = 11.258778085341058
53
+ att_loss = 69.5490453120802
54
+ global_step = 13999
55
+ loss = 20.161243635634456
56
+ rep_loss = 11.095929238888031
57
+ att_loss = 66.863694736835
58
+ global_step = 14999
59
+ loss = 19.451515919177147
60
+ rep_loss = 10.942368946756408
61
+ att_loss = 64.48475605953156
62
+ global_step = 15999
63
+ loss = 18.82124200939603
64
+ rep_loss = 10.800211985354588
65
+ att_loss = 62.36240669536383
66
+ global_step = 16999
67
+ loss = 18.25807497858882
68
+ rep_loss = 10.669893227154258
69
+ att_loss = 60.45600431909481
70
+ global_step = 17999
71
+ loss = 17.75172053029043
72
+ rep_loss = 10.550877809140395
73
+ att_loss = 58.73684728205207
74
+ global_step = 18999
75
+ loss = 17.294778685248758
76
+ rep_loss = 10.442267465330412
77
+ att_loss = 57.178288419006215
78
+ global_step = 19999
79
+ loss = 16.880314805327764
80
+ rep_loss = 10.342970809934616
81
+ att_loss = 55.75939049489828
82
+ global_step = 20999
83
+ loss = 16.50286048242856
84
+ rep_loss = 10.252051443081537
85
+ att_loss = 54.462385941951986
86
+ global_step = 21999
87
+ loss = 16.15771857637336
88
+ rep_loss = 10.168488371691392
89
+ att_loss = 53.27148981758644
90
+ global_step = 22999
91
+ loss = 15.840749783227535
92
+ rep_loss = 10.091509322725361
93
+ att_loss = 52.1748089551121
94
+ global_step = 23999
95
+ loss = 15.548789579672905
96
+ rep_loss = 10.02034937130858
97
+ att_loss = 26.78962243135852
98
+ global_step = 24999
99
+ loss = 8.789205611776241
100
+ rep_loss = 8.367200058350448
101
+ att_loss = 26.729843077966173
102
+ global_step = 25999
103
+ loss = 8.772781761001008
104
+ rep_loss = 8.361283999649155
105
+ att_loss = 26.682301047169865
106
+ global_step = 26999
107
+ loss = 8.75986536118413
108
+ rep_loss = 8.357160423999789
109
+ att_loss = 26.637808661227602
110
+ global_step = 27999
111
+ loss = 8.747724119054261
112
+ rep_loss = 8.35308783723025
113
+ att_loss = 26.595655785215897
114
+ global_step = 28999
115
+ loss = 8.736268049074313
116
+ rep_loss = 8.34941643002816
117
+ att_loss = 26.553387359643654
118
+ global_step = 29999
119
+ loss = 8.724733944178935
120
+ rep_loss = 8.345548433609528
121
+ att_loss = 26.511934938841605
122
+ global_step = 30999
123
+ loss = 8.713468565658134
124
+ rep_loss = 8.341939335615224
125
+ att_loss = 26.47209676782579
126
+ global_step = 31999
127
+ loss = 8.70264744013037
128
+ rep_loss = 8.33849300280011
129
+ att_loss = 26.435741815876433
130
+ global_step = 32999
131
+ loss = 8.6927635342749
132
+ rep_loss = 8.33531233269693
133
+ att_loss = 26.40115444237546
134
+ global_step = 33999
135
+ loss = 8.683378330563693
136
+ rep_loss = 8.33235888937397
137
+ att_loss = 26.368730268931554
138
+ global_step = 34999
139
+ loss = 8.674565872174405
140
+ rep_loss = 8.329533228384442
141
+ att_loss = 26.33715835482096
142
+ global_step = 35999
143
+ loss = 8.666019251111992
144
+ rep_loss = 8.326918655792824
145
+ att_loss = 26.30735234986891
146
+ global_step = 36999
147
+ loss = 8.657943420050909
148
+ rep_loss = 8.324421335688523
149
+ att_loss = 26.278938743063776
150
+ global_step = 37999
151
+ loss = 8.650236012766838
152
+ rep_loss = 8.322005312864869
153
+ att_loss = 26.25172895231054
154
+ global_step = 38999
155
+ loss = 8.642865562378905
156
+ rep_loss = 8.319733299157008
157
+ att_loss = 26.22554387304705
158
+ global_step = 39999
159
+ loss = 8.63577436198668
160
+ rep_loss = 8.317553578813285
161
+ att_loss = 26.201288267405722
162
+ global_step = 40999
163
+ loss = 8.629222177960115
164
+ rep_loss = 8.315600447361732
165
+ att_loss = 26.177384423765602
166
+ global_step = 41999
167
+ loss = 8.62276040119899
168
+ rep_loss = 8.313657183595035
169
+ att_loss = 26.1553768101701
170
+ global_step = 42999
171
+ loss = 8.616789896070687
172
+ rep_loss = 8.311782776810839
173
+ att_loss = 26.134055339334097
174
+ global_step = 43999
175
+ loss = 8.611018220643333
176
+ rep_loss = 8.31001754608182
177
+ att_loss = 26.11391984937601
178
+ global_step = 44999
179
+ loss = 8.605568707086226
180
+ rep_loss = 8.308354981177883
181
+ att_loss = 26.094756877527036
182
+ global_step = 45999
183
+ loss = 8.600382417517402
184
+ rep_loss = 8.306772793449616
185
+ att_loss = 26.075705275818592
186
+ global_step = 46999
187
+ loss = 8.595226314131908
188
+ rep_loss = 8.30519998013147
189
+ att_loss = 26.057315672295104
190
+ global_step = 47999
191
+ loss = 8.590256583761787
192
+ rep_loss = 8.303710662782752
193
+ att_loss = 26.039835258400025
194
+ global_step = 48999
195
+ loss = 8.58552456486098
196
+ rep_loss = 8.302263001250012
197
+ att_loss = 25.612119941318834
198
+ global_step = 49999
199
+ loss = 8.470151519284649
200
+ rep_loss = 8.268486091244078
201
+ att_loss = 25.60738331776063
202
+ global_step = 50999
203
+ loss = 8.468582282244068
204
+ rep_loss = 8.266945785009176
205
+ att_loss = 25.596803975668333
206
+ global_step = 51999
207
+ loss = 8.465786078342669
208
+ rep_loss = 8.266340331887257
209
+ att_loss = 25.5892900415815
210
+ global_step = 52999
211
+ loss = 8.463734707418448
212
+ rep_loss = 8.265648783767087
213
+ att_loss = 25.57996551027575
214
+ global_step = 53999
215
+ loss = 8.461273999810349
216
+ rep_loss = 8.265130484283633
217
+ att_loss = 25.571993088094473
218
+ global_step = 54999
219
+ loss = 8.459134756525891
220
+ rep_loss = 8.26454593587387
221
+ att_loss = 25.56450918759145
222
+ global_step = 55999
223
+ loss = 8.4571359498005
224
+ rep_loss = 8.264034611357031
225
+ att_loss = 25.55813332545982
226
+ global_step = 56999
227
+ loss = 8.455404902957099
228
+ rep_loss = 8.263486286337137
229
+ att_loss = 25.551806679003086
230
+ global_step = 57999
231
+ loss = 8.453719509657649
232
+ rep_loss = 8.263071358877504
233
+ att_loss = 25.544399506585727
234
+ global_step = 58999
235
+ loss = 8.451735918449483
236
+ rep_loss = 8.262544165122339
237
+ att_loss = 25.539079044758008
238
+ global_step = 59999
239
+ loss = 8.450312722059321
240
+ rep_loss = 8.262171842803422
241
+ att_loss = 25.533065229481924
242
+ global_step = 60999
243
+ loss = 8.448696507862541
244
+ rep_loss = 8.261720797789794
245
+ att_loss = 25.5272183784343
246
+ global_step = 61999
247
+ loss = 8.447124415680117
248
+ rep_loss = 8.261279278620812
249
+ att_loss = 25.52183096024264
250
+ global_step = 62999
251
+ loss = 8.445677864036371
252
+ rep_loss = 8.26088049319973
253
+ att_loss = 25.516231528919583
254
+ global_step = 63999
255
+ loss = 8.444169045639065
256
+ rep_loss = 8.260444652900965
257
+ att_loss = 25.510988567145194
258
+ global_step = 64999
259
+ loss = 8.442773997412639
260
+ rep_loss = 8.26010742366815
261
+ att_loss = 25.506380913765547
262
+ global_step = 65999
263
+ loss = 8.441536241767274
264
+ rep_loss = 8.259764054583128
265
+ att_loss = 25.501700612429243
266
+ global_step = 66999
267
+ loss = 8.440280174074106
268
+ rep_loss = 8.259420086579105
269
+ att_loss = 25.498043594566656
270
+ global_step = 67999
271
+ loss = 8.439289064547964
272
+ rep_loss = 8.259112667204752
273
+ att_loss = 25.494367892728885
274
+ global_step = 68999
275
+ loss = 8.43829905562495
276
+ rep_loss = 8.258828334251232
277
+ att_loss = 25.490245821107287
278
+ global_step = 69999
279
+ loss = 8.4371964769438
280
+ rep_loss = 8.258540091776137
281
+ att_loss = 25.487574132507405
282
+ global_step = 70999
283
+ loss = 8.436473700620542
284
+ rep_loss = 8.258320674128282
285
+ att_loss = 25.483769633130947
286
+ global_step = 71999
287
+ loss = 8.435455289505901
288
+ rep_loss = 8.258051528925597
289
+ att_loss = 25.48069784268519
290
+ global_step = 72999
291
+ loss = 8.434621894379303
292
+ rep_loss = 8.257789739057904
293
+ att_loss = 25.47792583601433
294
+ global_step = 73999
295
+ loss = 8.433877880422072
296
+ rep_loss = 8.257585689757036
step_74124_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:670afc3cd0442862be4bf71075cc92c5a0462b42f3657793b7a62becff4f6b62
3
+ size 16193018
vocab.txt ADDED
The diff for this file is too large to render. See raw diff