diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..e8dd95b11055693eb71da2b9bdbcdee95093d961 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-80000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-90000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-70000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-100000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-110000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-130000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-120000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-150000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-60000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-140000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-160000/trainer_state.json filter=lfs diff=lfs merge=lfs -text +checkpoint-168144/trainer_state.json filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoint-10000/rng_state_4.pth b/checkpoint-10000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..28f7e8868fd4df63a6756b111fe1ea4a8e7eb6e2 --- /dev/null +++ b/checkpoint-10000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399c4700dab9ae7b754110ce307fb7e26e22cb49b5bb233c435f0f12b77c202f +size 15984 diff --git a/checkpoint-100000/rng_state_3.pth b/checkpoint-100000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..b05597dd523f7c9a629beaa525e6ad7122b018f9 --- /dev/null +++ b/checkpoint-100000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84 +size 15984 diff --git a/checkpoint-100000/rng_state_6.pth b/checkpoint-100000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9cc9104b41c399577a5b19f280ae6ba448edc4f --- /dev/null +++ b/checkpoint-100000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25755ba07299ee6ff45936dd04df329596319c9f8095af71e6f3a219e7543e26 +size 15984 diff --git a/checkpoint-100000/trainer_state.json b/checkpoint-100000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cf08ec7042b3da56376bdb9b2ab96e5ab166c2a5 --- /dev/null +++ b/checkpoint-100000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b3296cfba91b3d85433c52f73f4360316db6e8016670a8319248166d45ce97 +size 17536460 diff --git a/checkpoint-110000/rng_state_7.pth b/checkpoint-110000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7736ce8fe27978c921d21f9fd6d1cda8c15a03f9 --- /dev/null +++ b/checkpoint-110000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908f018cd701ed629c41299726da4a25f202f20a1d4bc2075a2266ed4013db3a +size 15984 diff --git a/checkpoint-110000/trainer_state.json b/checkpoint-110000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3dac6e22d8f4d1f2523b16ab1eb77a83ab76b517 --- /dev/null +++ b/checkpoint-110000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cbe4b60888b9a20e749a39c9202095897a81986432139cc12052aaba373c8f5 +size 19298676 diff --git a/checkpoint-120000/rng_state_0.pth b/checkpoint-120000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..8be4da50e08ef9215e5f46f9cc7ebcb8fd4593eb --- /dev/null +++ b/checkpoint-120000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955b895101e13c81ba510512313a06782795770a0bf998c90f718166d25f1664 +size 15984 diff --git a/checkpoint-120000/rng_state_1.pth b/checkpoint-120000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3450e76a148654c7a712cc0f7f4fbfb8af73468e --- /dev/null +++ b/checkpoint-120000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096e351ace65ff91008be171a45173ed6369cc639fce73a288f671041e24b0ec +size 15984 diff --git a/checkpoint-120000/rng_state_6.pth b/checkpoint-120000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9cc9104b41c399577a5b19f280ae6ba448edc4f --- /dev/null +++ b/checkpoint-120000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25755ba07299ee6ff45936dd04df329596319c9f8095af71e6f3a219e7543e26 +size 15984 diff --git a/checkpoint-120000/rng_state_7.pth b/checkpoint-120000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7736ce8fe27978c921d21f9fd6d1cda8c15a03f9 --- /dev/null +++ b/checkpoint-120000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908f018cd701ed629c41299726da4a25f202f20a1d4bc2075a2266ed4013db3a +size 15984 diff --git a/checkpoint-120000/scheduler.pt b/checkpoint-120000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e52026a1de16197826c58b573974d4d04245c5a8 --- /dev/null +++ b/checkpoint-120000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77a70af2bb64373017d7032bb8f7147abac42d23ddbd67e3a6a4aaa6a92b7190 +size 1064 diff --git a/checkpoint-120000/trainer_state.json b/checkpoint-120000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..15eab27368bf8dfd741eb56966b97857a632ad8b --- /dev/null +++ b/checkpoint-120000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993df490285ebdfafcf544990feb3e287af648a1d83d34041aae53d3d802feb1 +size 21060347 diff --git a/checkpoint-130000/rng_state_1.pth b/checkpoint-130000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3450e76a148654c7a712cc0f7f4fbfb8af73468e --- /dev/null +++ b/checkpoint-130000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096e351ace65ff91008be171a45173ed6369cc639fce73a288f671041e24b0ec +size 15984 diff --git a/checkpoint-130000/rng_state_3.pth b/checkpoint-130000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..b05597dd523f7c9a629beaa525e6ad7122b018f9 --- /dev/null +++ b/checkpoint-130000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84 +size 15984 diff --git a/checkpoint-130000/rng_state_5.pth b/checkpoint-130000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b81cf1f9e78f606b05125af5ec552416d94116f4 --- /dev/null +++ b/checkpoint-130000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864ea2379cc907eb4189c52706cb978150d9c26e18abf74679590729a8f0c8e8 +size 15984 diff --git a/checkpoint-130000/rng_state_7.pth b/checkpoint-130000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7736ce8fe27978c921d21f9fd6d1cda8c15a03f9 --- /dev/null +++ b/checkpoint-130000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908f018cd701ed629c41299726da4a25f202f20a1d4bc2075a2266ed4013db3a +size 15984 diff --git a/checkpoint-130000/trainer_state.json b/checkpoint-130000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5d8f298201578b5e75482695178e12721dc0fe77 --- /dev/null +++ b/checkpoint-130000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c7190fec1856b279f80b2acd81b6a56d3f939e97f15ea315a6c55456430680 +size 22814765 diff --git a/checkpoint-140000/rng_state_2.pth b/checkpoint-140000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b3bdae1f6c60868005426daf443380b27066cba --- /dev/null +++ b/checkpoint-140000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21c61b1a7e793bbdec183de3b52da90042305234bc7e5887986655cd3fc2192 +size 15984 diff --git a/checkpoint-140000/rng_state_4.pth b/checkpoint-140000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..28f7e8868fd4df63a6756b111fe1ea4a8e7eb6e2 --- /dev/null +++ b/checkpoint-140000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399c4700dab9ae7b754110ce307fb7e26e22cb49b5bb233c435f0f12b77c202f +size 15984 diff --git a/checkpoint-140000/rng_state_7.pth b/checkpoint-140000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7736ce8fe27978c921d21f9fd6d1cda8c15a03f9 --- /dev/null +++ b/checkpoint-140000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908f018cd701ed629c41299726da4a25f202f20a1d4bc2075a2266ed4013db3a +size 15984 diff --git a/checkpoint-140000/scheduler.pt b/checkpoint-140000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..89ffb1508c775d23a46daa67ed0f6bea7440eac7 --- /dev/null +++ b/checkpoint-140000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f253f091177361c8be3f2e930d54158f465c06ecd7559dd797f06d351edf12b7 +size 1064 diff --git a/checkpoint-140000/trainer_state.json b/checkpoint-140000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..592933229b946e6caec13f6703fbf6a2520b67ac --- /dev/null +++ b/checkpoint-140000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4afc54e9b48f65fc658b6c50d37e7b5d80e2a52f58f339636428726cb1c94b81 +size 24571198 diff --git a/checkpoint-150000/rng_state_1.pth b/checkpoint-150000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3450e76a148654c7a712cc0f7f4fbfb8af73468e --- /dev/null +++ b/checkpoint-150000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096e351ace65ff91008be171a45173ed6369cc639fce73a288f671041e24b0ec +size 15984 diff --git a/checkpoint-150000/rng_state_3.pth b/checkpoint-150000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..b05597dd523f7c9a629beaa525e6ad7122b018f9 --- /dev/null +++ b/checkpoint-150000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84 +size 15984 diff --git a/checkpoint-150000/rng_state_4.pth b/checkpoint-150000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..28f7e8868fd4df63a6756b111fe1ea4a8e7eb6e2 --- /dev/null +++ b/checkpoint-150000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399c4700dab9ae7b754110ce307fb7e26e22cb49b5bb233c435f0f12b77c202f +size 15984 diff --git a/checkpoint-150000/scheduler.pt b/checkpoint-150000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3ac53646ec1365189412c6d4ec909c5f13375a1 --- /dev/null +++ b/checkpoint-150000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bea14e25f2a7f84d371ef8fe19b4ffa2c20fbf0a696f18247df48a4aaebe6fc +size 1064 diff --git a/checkpoint-150000/trainer_state.json b/checkpoint-150000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..27bdee179b514a5d1421102d5276ec8438905959 --- /dev/null +++ b/checkpoint-150000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a7ead47e80809a72ad6540965942da9094aa2208d0120e9d316011f3fe4f3e4 +size 26332314 diff --git a/checkpoint-160000/rng_state_0.pth b/checkpoint-160000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..8be4da50e08ef9215e5f46f9cc7ebcb8fd4593eb --- /dev/null +++ b/checkpoint-160000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955b895101e13c81ba510512313a06782795770a0bf998c90f718166d25f1664 +size 15984 diff --git a/checkpoint-160000/rng_state_1.pth b/checkpoint-160000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3450e76a148654c7a712cc0f7f4fbfb8af73468e --- /dev/null +++ b/checkpoint-160000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096e351ace65ff91008be171a45173ed6369cc639fce73a288f671041e24b0ec +size 15984 diff --git a/checkpoint-160000/rng_state_2.pth b/checkpoint-160000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b3bdae1f6c60868005426daf443380b27066cba --- /dev/null +++ b/checkpoint-160000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21c61b1a7e793bbdec183de3b52da90042305234bc7e5887986655cd3fc2192 +size 15984 diff --git a/checkpoint-160000/rng_state_3.pth b/checkpoint-160000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..b05597dd523f7c9a629beaa525e6ad7122b018f9 --- /dev/null +++ b/checkpoint-160000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84 +size 15984 diff --git a/checkpoint-160000/rng_state_4.pth b/checkpoint-160000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..28f7e8868fd4df63a6756b111fe1ea4a8e7eb6e2 --- /dev/null +++ b/checkpoint-160000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399c4700dab9ae7b754110ce307fb7e26e22cb49b5bb233c435f0f12b77c202f +size 15984 diff --git a/checkpoint-160000/rng_state_5.pth b/checkpoint-160000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b81cf1f9e78f606b05125af5ec552416d94116f4 --- /dev/null +++ b/checkpoint-160000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864ea2379cc907eb4189c52706cb978150d9c26e18abf74679590729a8f0c8e8 +size 15984 diff --git a/checkpoint-160000/rng_state_7.pth b/checkpoint-160000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7736ce8fe27978c921d21f9fd6d1cda8c15a03f9 --- /dev/null +++ b/checkpoint-160000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908f018cd701ed629c41299726da4a25f202f20a1d4bc2075a2266ed4013db3a +size 15984 diff --git a/checkpoint-160000/scheduler.pt b/checkpoint-160000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b38d402b4519c807dc5c0d2cd4a4229e3f6f5cab --- /dev/null +++ b/checkpoint-160000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:731f143540afcf9b80a64272108a9c11ac26849d842d12d4ec579b8a47a14c02 +size 1064 diff --git a/checkpoint-160000/trainer_state.json b/checkpoint-160000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..283b8b29ec3192f2f670f4d76c5bafd8678cd772 --- /dev/null +++ b/checkpoint-160000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216e5da91b5b5ee3f4b82a535cd7f763b93f7c753aa01b60fb0c5bfba8313727 +size 28090096 diff --git a/checkpoint-168144/rng_state_1.pth b/checkpoint-168144/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3450e76a148654c7a712cc0f7f4fbfb8af73468e --- /dev/null +++ b/checkpoint-168144/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096e351ace65ff91008be171a45173ed6369cc639fce73a288f671041e24b0ec +size 15984 diff --git a/checkpoint-168144/rng_state_2.pth b/checkpoint-168144/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b3bdae1f6c60868005426daf443380b27066cba --- /dev/null +++ b/checkpoint-168144/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21c61b1a7e793bbdec183de3b52da90042305234bc7e5887986655cd3fc2192 +size 15984 diff --git a/checkpoint-168144/rng_state_3.pth b/checkpoint-168144/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..b05597dd523f7c9a629beaa525e6ad7122b018f9 --- /dev/null +++ b/checkpoint-168144/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84 +size 15984 diff --git a/checkpoint-168144/rng_state_4.pth b/checkpoint-168144/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..28f7e8868fd4df63a6756b111fe1ea4a8e7eb6e2 --- /dev/null +++ b/checkpoint-168144/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399c4700dab9ae7b754110ce307fb7e26e22cb49b5bb233c435f0f12b77c202f +size 15984 diff --git a/checkpoint-168144/rng_state_5.pth b/checkpoint-168144/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b81cf1f9e78f606b05125af5ec552416d94116f4 --- /dev/null +++ b/checkpoint-168144/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864ea2379cc907eb4189c52706cb978150d9c26e18abf74679590729a8f0c8e8 +size 15984 diff --git a/checkpoint-168144/rng_state_6.pth b/checkpoint-168144/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9cc9104b41c399577a5b19f280ae6ba448edc4f --- /dev/null +++ b/checkpoint-168144/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25755ba07299ee6ff45936dd04df329596319c9f8095af71e6f3a219e7543e26 +size 15984 diff --git a/checkpoint-168144/rng_state_7.pth b/checkpoint-168144/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7736ce8fe27978c921d21f9fd6d1cda8c15a03f9 --- /dev/null +++ b/checkpoint-168144/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908f018cd701ed629c41299726da4a25f202f20a1d4bc2075a2266ed4013db3a +size 15984 diff --git a/checkpoint-168144/scheduler.pt b/checkpoint-168144/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f76a1a1aa589e1fe2011615c41e836b685293f37 --- /dev/null +++ b/checkpoint-168144/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04f3abfb86c3a716132002fa3f0d0d10f7b28502b039c62c368f9e817f93511a +size 1064 diff --git a/checkpoint-168144/trainer_state.json b/checkpoint-168144/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..767c2a2e77e6569abc83b0e6deddb00af6ff5faa --- /dev/null +++ b/checkpoint-168144/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d85a901acda4da7aafc3d6baf7f9fd9a3c77f6132a3da2ed2921f9a40b462824 +size 29522959 diff --git a/checkpoint-20000/rng_state_0.pth b/checkpoint-20000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..8be4da50e08ef9215e5f46f9cc7ebcb8fd4593eb --- /dev/null +++ b/checkpoint-20000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955b895101e13c81ba510512313a06782795770a0bf998c90f718166d25f1664 +size 15984 diff --git a/checkpoint-20000/rng_state_1.pth b/checkpoint-20000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3450e76a148654c7a712cc0f7f4fbfb8af73468e --- /dev/null +++ b/checkpoint-20000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096e351ace65ff91008be171a45173ed6369cc639fce73a288f671041e24b0ec +size 15984 diff --git a/checkpoint-20000/rng_state_3.pth b/checkpoint-20000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..b05597dd523f7c9a629beaa525e6ad7122b018f9 --- /dev/null +++ b/checkpoint-20000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84 +size 15984 diff --git a/checkpoint-20000/scheduler.pt b/checkpoint-20000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a9bd72b74e20c35ea6f869fc9f9e4e2b1ba359e --- /dev/null +++ b/checkpoint-20000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b4e5e645588d44ccac291aff2441d3eb20907e8b281244135ce511322173202 +size 1064 diff --git a/checkpoint-30000/rng_state_3.pth b/checkpoint-30000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..b05597dd523f7c9a629beaa525e6ad7122b018f9 --- /dev/null +++ b/checkpoint-30000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84 +size 15984 diff --git a/checkpoint-30000/rng_state_7.pth b/checkpoint-30000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7736ce8fe27978c921d21f9fd6d1cda8c15a03f9 --- /dev/null +++ b/checkpoint-30000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908f018cd701ed629c41299726da4a25f202f20a1d4bc2075a2266ed4013db3a +size 15984 diff --git a/checkpoint-30000/scheduler.pt b/checkpoint-30000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0f4db67f3b19e1d5530cd80b6d17f9ae238047a --- /dev/null +++ b/checkpoint-30000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ac17e8507155f7414e20152345d2f5db88cecd158e7a02814b19d43988faee +size 1064 diff --git a/checkpoint-40000/rng_state_5.pth b/checkpoint-40000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b81cf1f9e78f606b05125af5ec552416d94116f4 --- /dev/null +++ b/checkpoint-40000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864ea2379cc907eb4189c52706cb978150d9c26e18abf74679590729a8f0c8e8 +size 15984 diff --git a/checkpoint-60000/rng_state_3.pth b/checkpoint-60000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..b05597dd523f7c9a629beaa525e6ad7122b018f9 --- /dev/null +++ b/checkpoint-60000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84 +size 15984 diff --git a/checkpoint-60000/scheduler.pt b/checkpoint-60000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..df0fc1cbe0c68e4584aa721487452526f0c69b0a --- /dev/null +++ b/checkpoint-60000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcebd3f938e91ea913e2057ecf107b9ad246ba1337158c0c87f5d254795a47c0 +size 1064 diff --git a/checkpoint-60000/trainer_state.json b/checkpoint-60000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..895bc29267793578079b7de8eadccf24db710e1c --- /dev/null +++ b/checkpoint-60000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd50d067cf406c83e0172807c3c1b78af83ca27ce1755fd6edc00f220d7f9bc0 +size 10524148 diff --git a/checkpoint-70000/rng_state_1.pth b/checkpoint-70000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3450e76a148654c7a712cc0f7f4fbfb8af73468e --- /dev/null +++ b/checkpoint-70000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096e351ace65ff91008be171a45173ed6369cc639fce73a288f671041e24b0ec +size 15984 diff --git a/checkpoint-70000/trainer_state.json b/checkpoint-70000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e1afae31f123c45a0eeff84f18f86050e06778ca --- /dev/null +++ b/checkpoint-70000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4136876c2e2426601ffe902bfc79528124f4aeb56d48684d8ed3be3d2d23bff8 +size 12276143 diff --git a/checkpoint-80000/scheduler.pt b/checkpoint-80000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..128a22828626d50b218620a136eb10dc1fd3d850 --- /dev/null +++ b/checkpoint-80000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f3b67ae2794985196af21293251ae5b5b1e1f177e172218e270f7569cdec36 +size 1064 diff --git a/checkpoint-80000/trainer_state.json b/checkpoint-80000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..12426daf67e77faf792b94244b56567dc481dd7a --- /dev/null +++ b/checkpoint-80000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9081f1eb392c8332dfbb4d6fcd776585ab97bea9f56f9558db63d6c6b98099c1 +size 14031756 diff --git a/checkpoint-90000/config.json b/checkpoint-90000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..87dd93f00e47ebb0e5d9883ccc75f5850ac6aedc --- /dev/null +++ b/checkpoint-90000/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "max_position_embeddings": 32768, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.51.3", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 180348 +} diff --git a/checkpoint-90000/generation_config.json b/checkpoint-90000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..676d263ba4b8835e0d1c53f899a2645043e2e9df --- /dev/null +++ b/checkpoint-90000/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.1, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.51.3" +} diff --git a/checkpoint-90000/model.safetensors.index.json b/checkpoint-90000/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..3237092e4fcd45e652baa15621e9e4b23aa11253 --- /dev/null +++ b/checkpoint-90000/model.safetensors.index.json @@ -0,0 +1,346 @@ +{ + "metadata": { + "total_size": 7457478656 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-90000/rng_state_1.pth b/checkpoint-90000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3450e76a148654c7a712cc0f7f4fbfb8af73468e --- /dev/null +++ b/checkpoint-90000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096e351ace65ff91008be171a45173ed6369cc639fce73a288f671041e24b0ec +size 15984 diff --git a/checkpoint-90000/rng_state_2.pth b/checkpoint-90000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b3bdae1f6c60868005426daf443380b27066cba --- /dev/null +++ b/checkpoint-90000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21c61b1a7e793bbdec183de3b52da90042305234bc7e5887986655cd3fc2192 +size 15984 diff --git a/checkpoint-90000/rng_state_3.pth b/checkpoint-90000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..b05597dd523f7c9a629beaa525e6ad7122b018f9 --- /dev/null +++ b/checkpoint-90000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348742498d81780506d9760c655a7a7555185b5fbd70a7ae296d88fd9aeecd84 +size 15984 diff --git a/checkpoint-90000/rng_state_4.pth b/checkpoint-90000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..28f7e8868fd4df63a6756b111fe1ea4a8e7eb6e2 --- /dev/null +++ b/checkpoint-90000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399c4700dab9ae7b754110ce307fb7e26e22cb49b5bb233c435f0f12b77c202f +size 15984 diff --git a/checkpoint-90000/rng_state_5.pth b/checkpoint-90000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b81cf1f9e78f606b05125af5ec552416d94116f4 --- /dev/null +++ b/checkpoint-90000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864ea2379cc907eb4189c52706cb978150d9c26e18abf74679590729a8f0c8e8 +size 15984 diff --git a/checkpoint-90000/rng_state_6.pth b/checkpoint-90000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9cc9104b41c399577a5b19f280ae6ba448edc4f --- /dev/null +++ b/checkpoint-90000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25755ba07299ee6ff45936dd04df329596319c9f8095af71e6f3a219e7543e26 +size 15984 diff --git a/checkpoint-90000/rng_state_7.pth b/checkpoint-90000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..7736ce8fe27978c921d21f9fd6d1cda8c15a03f9 --- /dev/null +++ b/checkpoint-90000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908f018cd701ed629c41299726da4a25f202f20a1d4bc2075a2266ed4013db3a +size 15984 diff --git a/checkpoint-90000/trainer_state.json b/checkpoint-90000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3754faface84e046577fa0c69dbf61e6abfb11b1 --- /dev/null +++ b/checkpoint-90000/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d89782af592b189da9aca67572d0b32482e0e18a75d1cddc158497c8195d49 +size 15785048