mohammadmahdinouri commited on
Commit
bb37344
·
verified ·
1 Parent(s): 99838bf

Training in progress, step 29000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dba92eda90ccd8855e50aee729cdc5933197c20a8987e4bbca2496ccfb9f46eb
3
  size 189211642
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b38bfb27a9fc22c4b4574f2214176972bfa73ac8d3142646b820761f674ca95
3
  size 189211642
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8f471b58470f0f0e383b2688276d66ec94091abd49fe394fff9a7d394d7d0dc
3
  size 363608098
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9176a26395979f62437009f3466faa938a707a4323d4e687b5d57a6d6f8f1c
3
  size 363608098
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e41142a223ed1ae8017110280e6ff1f68b98f6d47c8beb2d6f6523b2cabd70c5
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb28b82c42a9bb5bf55d64e97db9f27987fa81a7c592e21d815d9cffa7c0a5df
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c493128931a764e58051cbbc6ca0186c75c2810adca6d6eca3ccc349e82ae214
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e1aa25338743d0285d43b88c2600ca688a8287786133cac01ad1f618ed37739
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e134aa099db26f2c8d758c8b103d6bd472b8ea45ff8e39d7896c6430387d54a
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:884b90160675995812f0bd6498c004f5dca4bb18e90a7252a45f89ada159e10c
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db90ca38a63561dbf292acd617970b0ad5074b0e5381b45d37012d6f0663100c
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89ce837998bb8f91c98f4e51f479825ed96387b3201a4c2cea96eef1bf292192
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5cfceb803c7665261ab2df9397277d216fbd340170042b1ba5a8d1d0d822658
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cebc651d508c09e942cf4f665fc7edfe43526ad316e05d80134d3ca9d4a1b0b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.04147681149974225,
6
  "eval_steps": 500,
7
- "global_step": 28000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -9808,6 +9808,356 @@
9808
  "learning_rate": 4.9320921131134654e-05,
9809
  "loss": 39.6667,
9810
  "step": 28000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9811
  }
9812
  ],
9813
  "logging_steps": 20,
@@ -9827,7 +10177,7 @@
9827
  "attributes": {}
9828
  }
9829
  },
9830
- "total_flos": 1.8052908555278746e+19,
9831
  "train_batch_size": 48,
9832
  "trial_name": null,
9833
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.042958126196161614,
6
  "eval_steps": 500,
7
+ "global_step": 29000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
9808
  "learning_rate": 4.9320921131134654e-05,
9809
  "loss": 39.6667,
9810
  "step": 28000
9811
+ },
9812
+ {
9813
+ "epoch": 0.041506437793670636,
9814
+ "grad_norm": 79.0,
9815
+ "learning_rate": 4.93204272376339e-05,
9816
+ "loss": 39.7124,
9817
+ "step": 28020
9818
+ },
9819
+ {
9820
+ "epoch": 0.04153606408759902,
9821
+ "grad_norm": 56.0,
9822
+ "learning_rate": 4.931993334413314e-05,
9823
+ "loss": 39.702,
9824
+ "step": 28040
9825
+ },
9826
+ {
9827
+ "epoch": 0.041565690381527416,
9828
+ "grad_norm": 70.0,
9829
+ "learning_rate": 4.9319439450632384e-05,
9830
+ "loss": 39.6675,
9831
+ "step": 28060
9832
+ },
9833
+ {
9834
+ "epoch": 0.0415953166754558,
9835
+ "grad_norm": 58.25,
9836
+ "learning_rate": 4.931894555713163e-05,
9837
+ "loss": 39.603,
9838
+ "step": 28080
9839
+ },
9840
+ {
9841
+ "epoch": 0.04162494296938419,
9842
+ "grad_norm": 58.0,
9843
+ "learning_rate": 4.931845166363087e-05,
9844
+ "loss": 39.6839,
9845
+ "step": 28100
9846
+ },
9847
+ {
9848
+ "epoch": 0.041654569263312576,
9849
+ "grad_norm": 66.0,
9850
+ "learning_rate": 4.9317957770130115e-05,
9851
+ "loss": 39.7011,
9852
+ "step": 28120
9853
+ },
9854
+ {
9855
+ "epoch": 0.04168419555724096,
9856
+ "grad_norm": 63.25,
9857
+ "learning_rate": 4.931746387662936e-05,
9858
+ "loss": 39.702,
9859
+ "step": 28140
9860
+ },
9861
+ {
9862
+ "epoch": 0.04171382185116935,
9863
+ "grad_norm": 66.0,
9864
+ "learning_rate": 4.93169699831286e-05,
9865
+ "loss": 39.6368,
9866
+ "step": 28160
9867
+ },
9868
+ {
9869
+ "epoch": 0.041743448145097735,
9870
+ "grad_norm": 71.0,
9871
+ "learning_rate": 4.9316476089627845e-05,
9872
+ "loss": 39.6087,
9873
+ "step": 28180
9874
+ },
9875
+ {
9876
+ "epoch": 0.04177307443902612,
9877
+ "grad_norm": 76.5,
9878
+ "learning_rate": 4.931598219612709e-05,
9879
+ "loss": 39.6342,
9880
+ "step": 28200
9881
+ },
9882
+ {
9883
+ "epoch": 0.041802700732954515,
9884
+ "grad_norm": 79.5,
9885
+ "learning_rate": 4.9315488302626325e-05,
9886
+ "loss": 39.5706,
9887
+ "step": 28220
9888
+ },
9889
+ {
9890
+ "epoch": 0.0418323270268829,
9891
+ "grad_norm": 66.5,
9892
+ "learning_rate": 4.9314994409125575e-05,
9893
+ "loss": 39.6543,
9894
+ "step": 28240
9895
+ },
9896
+ {
9897
+ "epoch": 0.04186195332081129,
9898
+ "grad_norm": 58.5,
9899
+ "learning_rate": 4.931450051562482e-05,
9900
+ "loss": 39.5878,
9901
+ "step": 28260
9902
+ },
9903
+ {
9904
+ "epoch": 0.041891579614739674,
9905
+ "grad_norm": 59.0,
9906
+ "learning_rate": 4.931400662212406e-05,
9907
+ "loss": 39.6459,
9908
+ "step": 28280
9909
+ },
9910
+ {
9911
+ "epoch": 0.04192120590866806,
9912
+ "grad_norm": 49.25,
9913
+ "learning_rate": 4.93135127286233e-05,
9914
+ "loss": 39.5152,
9915
+ "step": 28300
9916
+ },
9917
+ {
9918
+ "epoch": 0.04195083220259645,
9919
+ "grad_norm": 64.0,
9920
+ "learning_rate": 4.931301883512255e-05,
9921
+ "loss": 39.5766,
9922
+ "step": 28320
9923
+ },
9924
+ {
9925
+ "epoch": 0.04198045849652483,
9926
+ "grad_norm": 58.0,
9927
+ "learning_rate": 4.931252494162179e-05,
9928
+ "loss": 39.5904,
9929
+ "step": 28340
9930
+ },
9931
+ {
9932
+ "epoch": 0.04201008479045322,
9933
+ "grad_norm": 58.25,
9934
+ "learning_rate": 4.9312031048121036e-05,
9935
+ "loss": 39.6196,
9936
+ "step": 28360
9937
+ },
9938
+ {
9939
+ "epoch": 0.04203971108438161,
9940
+ "grad_norm": 61.25,
9941
+ "learning_rate": 4.931153715462028e-05,
9942
+ "loss": 39.5726,
9943
+ "step": 28380
9944
+ },
9945
+ {
9946
+ "epoch": 0.04206933737831,
9947
+ "grad_norm": 61.75,
9948
+ "learning_rate": 4.931104326111952e-05,
9949
+ "loss": 39.5673,
9950
+ "step": 28400
9951
+ },
9952
+ {
9953
+ "epoch": 0.042098963672238386,
9954
+ "grad_norm": 74.0,
9955
+ "learning_rate": 4.9310549367618766e-05,
9956
+ "loss": 39.546,
9957
+ "step": 28420
9958
+ },
9959
+ {
9960
+ "epoch": 0.04212858996616677,
9961
+ "grad_norm": 60.75,
9962
+ "learning_rate": 4.9310055474118e-05,
9963
+ "loss": 39.5487,
9964
+ "step": 28440
9965
+ },
9966
+ {
9967
+ "epoch": 0.04215821626009516,
9968
+ "grad_norm": 52.0,
9969
+ "learning_rate": 4.930956158061725e-05,
9970
+ "loss": 39.5736,
9971
+ "step": 28460
9972
+ },
9973
+ {
9974
+ "epoch": 0.042187842554023545,
9975
+ "grad_norm": 72.5,
9976
+ "learning_rate": 4.9309067687116496e-05,
9977
+ "loss": 39.45,
9978
+ "step": 28480
9979
+ },
9980
+ {
9981
+ "epoch": 0.04221746884795193,
9982
+ "grad_norm": 75.5,
9983
+ "learning_rate": 4.930857379361574e-05,
9984
+ "loss": 39.5851,
9985
+ "step": 28500
9986
+ },
9987
+ {
9988
+ "epoch": 0.04224709514188032,
9989
+ "grad_norm": 98.5,
9990
+ "learning_rate": 4.9308079900114976e-05,
9991
+ "loss": 39.5567,
9992
+ "step": 28520
9993
+ },
9994
+ {
9995
+ "epoch": 0.04227672143580871,
9996
+ "grad_norm": 56.25,
9997
+ "learning_rate": 4.9307586006614227e-05,
9998
+ "loss": 39.4901,
9999
+ "step": 28540
10000
+ },
10001
+ {
10002
+ "epoch": 0.0423063477297371,
10003
+ "grad_norm": 61.0,
10004
+ "learning_rate": 4.930709211311347e-05,
10005
+ "loss": 39.4569,
10006
+ "step": 28560
10007
+ },
10008
+ {
10009
+ "epoch": 0.042335974023665485,
10010
+ "grad_norm": 64.0,
10011
+ "learning_rate": 4.930659821961271e-05,
10012
+ "loss": 39.5612,
10013
+ "step": 28580
10014
+ },
10015
+ {
10016
+ "epoch": 0.04236560031759387,
10017
+ "grad_norm": 58.25,
10018
+ "learning_rate": 4.930610432611195e-05,
10019
+ "loss": 39.4928,
10020
+ "step": 28600
10021
+ },
10022
+ {
10023
+ "epoch": 0.04239522661152226,
10024
+ "grad_norm": 63.75,
10025
+ "learning_rate": 4.93056104326112e-05,
10026
+ "loss": 39.5275,
10027
+ "step": 28620
10028
+ },
10029
+ {
10030
+ "epoch": 0.042424852905450644,
10031
+ "grad_norm": 68.0,
10032
+ "learning_rate": 4.9305116539110444e-05,
10033
+ "loss": 39.4773,
10034
+ "step": 28640
10035
+ },
10036
+ {
10037
+ "epoch": 0.04245447919937903,
10038
+ "grad_norm": 72.0,
10039
+ "learning_rate": 4.930462264560968e-05,
10040
+ "loss": 39.5318,
10041
+ "step": 28660
10042
+ },
10043
+ {
10044
+ "epoch": 0.04248410549330742,
10045
+ "grad_norm": 56.0,
10046
+ "learning_rate": 4.930412875210893e-05,
10047
+ "loss": 39.4474,
10048
+ "step": 28680
10049
+ },
10050
+ {
10051
+ "epoch": 0.04251373178723581,
10052
+ "grad_norm": 66.0,
10053
+ "learning_rate": 4.9303634858608174e-05,
10054
+ "loss": 39.485,
10055
+ "step": 28700
10056
+ },
10057
+ {
10058
+ "epoch": 0.0425433580811642,
10059
+ "grad_norm": 71.5,
10060
+ "learning_rate": 4.930314096510741e-05,
10061
+ "loss": 39.4602,
10062
+ "step": 28720
10063
+ },
10064
+ {
10065
+ "epoch": 0.04257298437509258,
10066
+ "grad_norm": 58.0,
10067
+ "learning_rate": 4.9302647071606654e-05,
10068
+ "loss": 39.4508,
10069
+ "step": 28740
10070
+ },
10071
+ {
10072
+ "epoch": 0.04260261066902097,
10073
+ "grad_norm": 62.5,
10074
+ "learning_rate": 4.9302153178105904e-05,
10075
+ "loss": 39.4066,
10076
+ "step": 28760
10077
+ },
10078
+ {
10079
+ "epoch": 0.042632236962949356,
10080
+ "grad_norm": 61.75,
10081
+ "learning_rate": 4.930165928460515e-05,
10082
+ "loss": 39.4411,
10083
+ "step": 28780
10084
+ },
10085
+ {
10086
+ "epoch": 0.04266186325687774,
10087
+ "grad_norm": 62.0,
10088
+ "learning_rate": 4.9301165391104384e-05,
10089
+ "loss": 39.4268,
10090
+ "step": 28800
10091
+ },
10092
+ {
10093
+ "epoch": 0.04269148955080613,
10094
+ "grad_norm": 54.0,
10095
+ "learning_rate": 4.930067149760363e-05,
10096
+ "loss": 39.3404,
10097
+ "step": 28820
10098
+ },
10099
+ {
10100
+ "epoch": 0.042721115844734515,
10101
+ "grad_norm": 56.75,
10102
+ "learning_rate": 4.930017760410288e-05,
10103
+ "loss": 39.3246,
10104
+ "step": 28840
10105
+ },
10106
+ {
10107
+ "epoch": 0.04275074213866291,
10108
+ "grad_norm": 58.25,
10109
+ "learning_rate": 4.929968371060212e-05,
10110
+ "loss": 39.3779,
10111
+ "step": 28860
10112
+ },
10113
+ {
10114
+ "epoch": 0.042780368432591295,
10115
+ "grad_norm": 64.0,
10116
+ "learning_rate": 4.929918981710136e-05,
10117
+ "loss": 39.4462,
10118
+ "step": 28880
10119
+ },
10120
+ {
10121
+ "epoch": 0.04280999472651968,
10122
+ "grad_norm": 66.0,
10123
+ "learning_rate": 4.92986959236006e-05,
10124
+ "loss": 39.3394,
10125
+ "step": 28900
10126
+ },
10127
+ {
10128
+ "epoch": 0.04283962102044807,
10129
+ "grad_norm": 57.25,
10130
+ "learning_rate": 4.929820203009985e-05,
10131
+ "loss": 39.3868,
10132
+ "step": 28920
10133
+ },
10134
+ {
10135
+ "epoch": 0.042869247314376455,
10136
+ "grad_norm": 78.0,
10137
+ "learning_rate": 4.929770813659909e-05,
10138
+ "loss": 39.3587,
10139
+ "step": 28940
10140
+ },
10141
+ {
10142
+ "epoch": 0.04289887360830484,
10143
+ "grad_norm": 63.25,
10144
+ "learning_rate": 4.929721424309833e-05,
10145
+ "loss": 39.4021,
10146
+ "step": 28960
10147
+ },
10148
+ {
10149
+ "epoch": 0.04292849990223323,
10150
+ "grad_norm": 51.5,
10151
+ "learning_rate": 4.929672034959758e-05,
10152
+ "loss": 39.313,
10153
+ "step": 28980
10154
+ },
10155
+ {
10156
+ "epoch": 0.042958126196161614,
10157
+ "grad_norm": 57.5,
10158
+ "learning_rate": 4.9296226456096825e-05,
10159
+ "loss": 39.3612,
10160
+ "step": 29000
10161
  }
10162
  ],
10163
  "logging_steps": 20,
 
10177
  "attributes": {}
10178
  }
10179
  },
10180
+ "total_flos": 1.869765281936782e+19,
10181
  "train_batch_size": 48,
10182
  "trial_name": null,
10183
  "trial_params": null