| { | |
| "train_loss": [ | |
| { | |
| "step": 1, | |
| "loss": 0.4917 | |
| }, | |
| { | |
| "step": 2, | |
| "loss": 0.6049 | |
| }, | |
| { | |
| "step": 3, | |
| "loss": 0.6407 | |
| }, | |
| { | |
| "step": 4, | |
| "loss": 0.3972 | |
| }, | |
| { | |
| "step": 5, | |
| "loss": 0.453 | |
| }, | |
| { | |
| "step": 6, | |
| "loss": 0.418 | |
| }, | |
| { | |
| "step": 7, | |
| "loss": 0.303 | |
| }, | |
| { | |
| "step": 8, | |
| "loss": 0.1847 | |
| }, | |
| { | |
| "step": 9, | |
| "loss": 0.3514 | |
| }, | |
| { | |
| "step": 10, | |
| "loss": 0.2434 | |
| }, | |
| { | |
| "step": 11, | |
| "loss": 0.1577 | |
| }, | |
| { | |
| "step": 12, | |
| "loss": 0.209 | |
| }, | |
| { | |
| "step": 13, | |
| "loss": 0.2123 | |
| }, | |
| { | |
| "step": 14, | |
| "loss": 0.2712 | |
| }, | |
| { | |
| "step": 15, | |
| "loss": 0.2975 | |
| }, | |
| { | |
| "step": 16, | |
| "loss": 0.3977 | |
| }, | |
| { | |
| "step": 17, | |
| "loss": 0.3933 | |
| }, | |
| { | |
| "step": 18, | |
| "loss": 0.2866 | |
| }, | |
| { | |
| "step": 19, | |
| "loss": 0.4174 | |
| }, | |
| { | |
| "step": 20, | |
| "loss": 0.3588 | |
| }, | |
| { | |
| "step": 21, | |
| "loss": 0.1919 | |
| }, | |
| { | |
| "step": 22, | |
| "loss": 0.475 | |
| }, | |
| { | |
| "step": 23, | |
| "loss": 0.223 | |
| }, | |
| { | |
| "step": 24, | |
| "loss": 0.3825 | |
| }, | |
| { | |
| "step": 25, | |
| "loss": 0.1645 | |
| }, | |
| { | |
| "step": 26, | |
| "loss": 0.2517 | |
| }, | |
| { | |
| "step": 27, | |
| "loss": 0.1865 | |
| }, | |
| { | |
| "step": 28, | |
| "loss": 0.2887 | |
| }, | |
| { | |
| "step": 29, | |
| "loss": 0.2154 | |
| }, | |
| { | |
| "step": 30, | |
| "loss": 0.3605 | |
| }, | |
| { | |
| "step": 31, | |
| "loss": 0.1299 | |
| }, | |
| { | |
| "step": 32, | |
| "loss": 0.1772 | |
| }, | |
| { | |
| "step": 33, | |
| "loss": 0.3281 | |
| }, | |
| { | |
| "step": 34, | |
| "loss": 0.1973 | |
| }, | |
| { | |
| "step": 35, | |
| "loss": 0.2645 | |
| }, | |
| { | |
| "step": 36, | |
| "loss": 0.1938 | |
| }, | |
| { | |
| "step": 37, | |
| "loss": 0.1577 | |
| }, | |
| { | |
| "step": 38, | |
| "loss": 0.1917 | |
| }, | |
| { | |
| "step": 39, | |
| "loss": 0.0703 | |
| }, | |
| { | |
| "step": 40, | |
| "loss": 0.1402 | |
| }, | |
| { | |
| "step": 41, | |
| "loss": 0.1274 | |
| }, | |
| { | |
| "step": 42, | |
| "loss": 0.1871 | |
| }, | |
| { | |
| "step": 43, | |
| "loss": 0.2981 | |
| }, | |
| { | |
| "step": 44, | |
| "loss": 0.0962 | |
| }, | |
| { | |
| "step": 45, | |
| "loss": 0.1616 | |
| }, | |
| { | |
| "step": 46, | |
| "loss": 0.0614 | |
| }, | |
| { | |
| "step": 47, | |
| "loss": 0.1468 | |
| }, | |
| { | |
| "step": 48, | |
| "loss": 0.0482 | |
| }, | |
| { | |
| "step": 49, | |
| "loss": 0.1177 | |
| }, | |
| { | |
| "step": 50, | |
| "loss": 0.0788 | |
| }, | |
| { | |
| "step": 51, | |
| "loss": 0.1786 | |
| }, | |
| { | |
| "step": 52, | |
| "loss": 0.1685 | |
| }, | |
| { | |
| "step": 53, | |
| "loss": 0.1178 | |
| }, | |
| { | |
| "step": 54, | |
| "loss": 0.1016 | |
| }, | |
| { | |
| "step": 55, | |
| "loss": 0.1223 | |
| }, | |
| { | |
| "step": 56, | |
| "loss": 0.0331 | |
| }, | |
| { | |
| "step": 57, | |
| "loss": 0.1314 | |
| }, | |
| { | |
| "step": 58, | |
| "loss": 0.0504 | |
| }, | |
| { | |
| "step": 59, | |
| "loss": 0.0327 | |
| }, | |
| { | |
| "step": 60, | |
| "loss": 0.0766 | |
| }, | |
| { | |
| "step": 61, | |
| "loss": 0.0167 | |
| }, | |
| { | |
| "step": 62, | |
| "loss": 0.0176 | |
| }, | |
| { | |
| "step": 63, | |
| "loss": 0.1523 | |
| }, | |
| { | |
| "step": 64, | |
| "loss": 0.0172 | |
| }, | |
| { | |
| "step": 65, | |
| "loss": 0.1337 | |
| }, | |
| { | |
| "step": 66, | |
| "loss": 0.18 | |
| }, | |
| { | |
| "step": 67, | |
| "loss": 0.0076 | |
| }, | |
| { | |
| "step": 68, | |
| "loss": 0.0051 | |
| }, | |
| { | |
| "step": 69, | |
| "loss": 0.0408 | |
| }, | |
| { | |
| "step": 70, | |
| "loss": 0.0744 | |
| }, | |
| { | |
| "step": 71, | |
| "loss": 0.0123 | |
| }, | |
| { | |
| "step": 72, | |
| "loss": 0.006 | |
| }, | |
| { | |
| "step": 73, | |
| "loss": 0.3016 | |
| }, | |
| { | |
| "step": 74, | |
| "loss": 0.1201 | |
| }, | |
| { | |
| "step": 75, | |
| "loss": 0.037 | |
| }, | |
| { | |
| "step": 76, | |
| "loss": 0.0413 | |
| }, | |
| { | |
| "step": 77, | |
| "loss": 0.0697 | |
| }, | |
| { | |
| "step": 78, | |
| "loss": 0.0485 | |
| }, | |
| { | |
| "step": 79, | |
| "loss": 0.0686 | |
| }, | |
| { | |
| "step": 80, | |
| "loss": 0.0649 | |
| }, | |
| { | |
| "step": 81, | |
| "loss": 0.1002 | |
| }, | |
| { | |
| "step": 82, | |
| "loss": 0.0235 | |
| }, | |
| { | |
| "step": 83, | |
| "loss": 0.1955 | |
| }, | |
| { | |
| "step": 84, | |
| "loss": 0.1692 | |
| }, | |
| { | |
| "step": 85, | |
| "loss": 0.0178 | |
| }, | |
| { | |
| "step": 86, | |
| "loss": 0.0282 | |
| }, | |
| { | |
| "step": 87, | |
| "loss": 0.0191 | |
| }, | |
| { | |
| "step": 88, | |
| "loss": 0.0177 | |
| }, | |
| { | |
| "step": 89, | |
| "loss": 0.0207 | |
| }, | |
| { | |
| "step": 90, | |
| "loss": 0.0082 | |
| }, | |
| { | |
| "step": 91, | |
| "loss": 0.0583 | |
| }, | |
| { | |
| "step": 92, | |
| "loss": 0.259 | |
| }, | |
| { | |
| "step": 93, | |
| "loss": 0.0295 | |
| }, | |
| { | |
| "step": 94, | |
| "loss": 0.0014 | |
| }, | |
| { | |
| "step": 95, | |
| "loss": 0.024 | |
| }, | |
| { | |
| "step": 96, | |
| "loss": 0.0795 | |
| }, | |
| { | |
| "step": 97, | |
| "loss": 0.1903 | |
| }, | |
| { | |
| "step": 98, | |
| "loss": 0.2885 | |
| }, | |
| { | |
| "step": 99, | |
| "loss": 0.2661 | |
| }, | |
| { | |
| "step": 100, | |
| "loss": 0.0502 | |
| }, | |
| { | |
| "step": 101, | |
| "loss": 0.0041 | |
| }, | |
| { | |
| "step": 102, | |
| "loss": 0.0947 | |
| }, | |
| { | |
| "step": 103, | |
| "loss": 0.1131 | |
| }, | |
| { | |
| "step": 104, | |
| "loss": 0.1105 | |
| }, | |
| { | |
| "step": 105, | |
| "loss": 0.0794 | |
| }, | |
| { | |
| "step": 106, | |
| "loss": 0.0057 | |
| }, | |
| { | |
| "step": 107, | |
| "loss": 0.3953 | |
| }, | |
| { | |
| "step": 108, | |
| "loss": 0.1099 | |
| }, | |
| { | |
| "step": 109, | |
| "loss": 0.2198 | |
| }, | |
| { | |
| "step": 110, | |
| "loss": 0.0586 | |
| }, | |
| { | |
| "step": 111, | |
| "loss": 0.3492 | |
| }, | |
| { | |
| "step": 112, | |
| "loss": 0.003 | |
| }, | |
| { | |
| "step": 113, | |
| "loss": 0.0365 | |
| }, | |
| { | |
| "step": 114, | |
| "loss": 0.1228 | |
| }, | |
| { | |
| "step": 115, | |
| "loss": 0.0174 | |
| }, | |
| { | |
| "step": 116, | |
| "loss": 0.0473 | |
| }, | |
| { | |
| "step": 117, | |
| "loss": 0.073 | |
| }, | |
| { | |
| "step": 118, | |
| "loss": 0.0736 | |
| }, | |
| { | |
| "step": 119, | |
| "loss": 0.3511 | |
| }, | |
| { | |
| "step": 120, | |
| "loss": 0.1691 | |
| }, | |
| { | |
| "step": 121, | |
| "loss": 0.2357 | |
| }, | |
| { | |
| "step": 122, | |
| "loss": 0.1983 | |
| }, | |
| { | |
| "step": 123, | |
| "loss": 0.043 | |
| }, | |
| { | |
| "step": 124, | |
| "loss": 0.0069 | |
| }, | |
| { | |
| "step": 125, | |
| "loss": 0.0348 | |
| }, | |
| { | |
| "step": 126, | |
| "loss": 0.0071 | |
| }, | |
| { | |
| "step": 127, | |
| "loss": 0.0153 | |
| }, | |
| { | |
| "step": 128, | |
| "loss": 0.0071 | |
| }, | |
| { | |
| "step": 129, | |
| "loss": 0.114 | |
| }, | |
| { | |
| "step": 130, | |
| "loss": 0.027 | |
| }, | |
| { | |
| "step": 131, | |
| "loss": 0.2155 | |
| }, | |
| { | |
| "step": 132, | |
| "loss": 0.0732 | |
| }, | |
| { | |
| "step": 133, | |
| "loss": 0.0479 | |
| }, | |
| { | |
| "step": 134, | |
| "loss": 0.1001 | |
| }, | |
| { | |
| "step": 135, | |
| "loss": 0.0795 | |
| }, | |
| { | |
| "step": 136, | |
| "loss": 0.0143 | |
| }, | |
| { | |
| "step": 137, | |
| "loss": 0.025 | |
| }, | |
| { | |
| "step": 138, | |
| "loss": 0.0375 | |
| }, | |
| { | |
| "step": 139, | |
| "loss": 0.0464 | |
| }, | |
| { | |
| "step": 140, | |
| "loss": 0.135 | |
| }, | |
| { | |
| "step": 141, | |
| "loss": 0.0437 | |
| }, | |
| { | |
| "step": 142, | |
| "loss": 0.1026 | |
| }, | |
| { | |
| "step": 143, | |
| "loss": 0.104 | |
| }, | |
| { | |
| "step": 144, | |
| "loss": 0.16 | |
| }, | |
| { | |
| "step": 145, | |
| "loss": 0.1841 | |
| }, | |
| { | |
| "step": 146, | |
| "loss": 0.2612 | |
| }, | |
| { | |
| "step": 147, | |
| "loss": 0.047 | |
| }, | |
| { | |
| "step": 148, | |
| "loss": 0.0236 | |
| }, | |
| { | |
| "step": 149, | |
| "loss": 0.061 | |
| }, | |
| { | |
| "step": 150, | |
| "loss": 0.3053 | |
| }, | |
| { | |
| "step": 151, | |
| "loss": 0.1405 | |
| }, | |
| { | |
| "step": 152, | |
| "loss": 0.1131 | |
| }, | |
| { | |
| "step": 153, | |
| "loss": 0.0147 | |
| }, | |
| { | |
| "step": 154, | |
| "loss": 0.0888 | |
| }, | |
| { | |
| "step": 155, | |
| "loss": 0.0136 | |
| }, | |
| { | |
| "step": 156, | |
| "loss": 0.0091 | |
| }, | |
| { | |
| "step": 157, | |
| "loss": 0.011 | |
| }, | |
| { | |
| "step": 158, | |
| "loss": 0.0063 | |
| }, | |
| { | |
| "step": 159, | |
| "loss": 0.014 | |
| }, | |
| { | |
| "step": 160, | |
| "loss": 0.0344 | |
| }, | |
| { | |
| "step": 161, | |
| "loss": 0.0981 | |
| }, | |
| { | |
| "step": 162, | |
| "loss": 0.1965 | |
| }, | |
| { | |
| "step": 163, | |
| "loss": 0.0887 | |
| }, | |
| { | |
| "step": 164, | |
| "loss": 0.0409 | |
| }, | |
| { | |
| "step": 165, | |
| "loss": 0.0588 | |
| }, | |
| { | |
| "step": 166, | |
| "loss": 0.0632 | |
| }, | |
| { | |
| "step": 167, | |
| "loss": 0.0903 | |
| }, | |
| { | |
| "step": 168, | |
| "loss": 0.0796 | |
| }, | |
| { | |
| "step": 169, | |
| "loss": 0.0415 | |
| }, | |
| { | |
| "step": 170, | |
| "loss": 0.0989 | |
| }, | |
| { | |
| "step": 171, | |
| "loss": 0.0974 | |
| }, | |
| { | |
| "step": 172, | |
| "loss": 0.0363 | |
| }, | |
| { | |
| "step": 173, | |
| "loss": 0.0086 | |
| }, | |
| { | |
| "step": 174, | |
| "loss": 0.005 | |
| }, | |
| { | |
| "step": 175, | |
| "loss": 0.1765 | |
| }, | |
| { | |
| "step": 176, | |
| "loss": 0.2432 | |
| }, | |
| { | |
| "step": 177, | |
| "loss": 0.0805 | |
| }, | |
| { | |
| "step": 178, | |
| "loss": 0.0227 | |
| }, | |
| { | |
| "step": 179, | |
| "loss": 0.3566 | |
| }, | |
| { | |
| "step": 180, | |
| "loss": 0.0232 | |
| }, | |
| { | |
| "step": 181, | |
| "loss": 0.1591 | |
| }, | |
| { | |
| "step": 182, | |
| "loss": 0.0734 | |
| }, | |
| { | |
| "step": 183, | |
| "loss": 0.0099 | |
| }, | |
| { | |
| "step": 184, | |
| "loss": 0.0127 | |
| }, | |
| { | |
| "step": 185, | |
| "loss": 0.0028 | |
| }, | |
| { | |
| "step": 186, | |
| "loss": 0.0022 | |
| }, | |
| { | |
| "step": 187, | |
| "loss": 0.0082 | |
| }, | |
| { | |
| "step": 188, | |
| "loss": 0.006 | |
| }, | |
| { | |
| "step": 189, | |
| "loss": 0.0506 | |
| }, | |
| { | |
| "step": 190, | |
| "loss": 0.0107 | |
| }, | |
| { | |
| "step": 191, | |
| "loss": 0.0287 | |
| }, | |
| { | |
| "step": 192, | |
| "loss": 0.0021 | |
| }, | |
| { | |
| "step": 193, | |
| "loss": 0.1491 | |
| }, | |
| { | |
| "step": 194, | |
| "loss": 0.1414 | |
| }, | |
| { | |
| "step": 195, | |
| "loss": 0.003 | |
| }, | |
| { | |
| "step": 196, | |
| "loss": 0.0506 | |
| }, | |
| { | |
| "step": 197, | |
| "loss": 0.0182 | |
| }, | |
| { | |
| "step": 198, | |
| "loss": 0.073 | |
| }, | |
| { | |
| "step": 199, | |
| "loss": 0.0265 | |
| }, | |
| { | |
| "step": 200, | |
| "loss": 0.0079 | |
| }, | |
| { | |
| "step": 201, | |
| "loss": 0.0569 | |
| }, | |
| { | |
| "step": 202, | |
| "loss": 0.0138 | |
| }, | |
| { | |
| "step": 203, | |
| "loss": 0.0001 | |
| }, | |
| { | |
| "step": 204, | |
| "loss": 0.2567 | |
| }, | |
| { | |
| "step": 205, | |
| "loss": 0.0002 | |
| }, | |
| { | |
| "step": 206, | |
| "loss": 0.0142 | |
| }, | |
| { | |
| "step": 207, | |
| "loss": 0.0546 | |
| }, | |
| { | |
| "step": 208, | |
| "loss": 0.0485 | |
| }, | |
| { | |
| "step": 209, | |
| "loss": 0.1518 | |
| }, | |
| { | |
| "step": 210, | |
| "loss": 0.1869 | |
| }, | |
| { | |
| "step": 211, | |
| "loss": 0.2424 | |
| }, | |
| { | |
| "step": 212, | |
| "loss": 0.0379 | |
| }, | |
| { | |
| "step": 213, | |
| "loss": 0.0088 | |
| }, | |
| { | |
| "step": 214, | |
| "loss": 0.001 | |
| }, | |
| { | |
| "step": 215, | |
| "loss": 0.0102 | |
| }, | |
| { | |
| "step": 216, | |
| "loss": 0.1525 | |
| }, | |
| { | |
| "step": 217, | |
| "loss": 0.0018 | |
| }, | |
| { | |
| "step": 218, | |
| "loss": 0.0026 | |
| }, | |
| { | |
| "step": 219, | |
| "loss": 0.0031 | |
| }, | |
| { | |
| "step": 220, | |
| "loss": 0.0206 | |
| }, | |
| { | |
| "step": 221, | |
| "loss": 0.1205 | |
| }, | |
| { | |
| "step": 222, | |
| "loss": 0.01 | |
| }, | |
| { | |
| "step": 223, | |
| "loss": 0.0597 | |
| }, | |
| { | |
| "step": 224, | |
| "loss": 0.1187 | |
| }, | |
| { | |
| "step": 225, | |
| "loss": 0.0635 | |
| }, | |
| { | |
| "step": 226, | |
| "loss": 0.1923 | |
| }, | |
| { | |
| "step": 227, | |
| "loss": 0.0098 | |
| }, | |
| { | |
| "step": 228, | |
| "loss": 0.0003 | |
| }, | |
| { | |
| "step": 229, | |
| "loss": 0.0434 | |
| }, | |
| { | |
| "step": 230, | |
| "loss": 0.005 | |
| }, | |
| { | |
| "step": 231, | |
| "loss": 0.0029 | |
| }, | |
| { | |
| "step": 232, | |
| "loss": 0.0043 | |
| }, | |
| { | |
| "step": 233, | |
| "loss": 0.0057 | |
| }, | |
| { | |
| "step": 234, | |
| "loss": 0.0003 | |
| }, | |
| { | |
| "step": 235, | |
| "loss": 0.0005 | |
| }, | |
| { | |
| "step": 236, | |
| "loss": 0.0244 | |
| }, | |
| { | |
| "step": 237, | |
| "loss": 0.0776 | |
| }, | |
| { | |
| "step": 238, | |
| "loss": 0.0203 | |
| }, | |
| { | |
| "step": 239, | |
| "loss": 0.2391 | |
| }, | |
| { | |
| "step": 240, | |
| "loss": 0.0533 | |
| }, | |
| { | |
| "step": 241, | |
| "loss": 0.0536 | |
| }, | |
| { | |
| "step": 242, | |
| "loss": 0.0045 | |
| }, | |
| { | |
| "step": 243, | |
| "loss": 0.0131 | |
| }, | |
| { | |
| "step": 244, | |
| "loss": 0.0037 | |
| }, | |
| { | |
| "step": 245, | |
| "loss": 0.0182 | |
| }, | |
| { | |
| "step": 246, | |
| "loss": 0.0078 | |
| }, | |
| { | |
| "step": 247, | |
| "loss": 0.0413 | |
| }, | |
| { | |
| "step": 248, | |
| "loss": 0.1189 | |
| }, | |
| { | |
| "step": 249, | |
| "loss": 0.0267 | |
| }, | |
| { | |
| "step": 250, | |
| "loss": 0.0507 | |
| }, | |
| { | |
| "step": 251, | |
| "loss": 0.0261 | |
| }, | |
| { | |
| "step": 252, | |
| "loss": 0.0043 | |
| }, | |
| { | |
| "step": 253, | |
| "loss": 0.1643 | |
| }, | |
| { | |
| "step": 254, | |
| "loss": 0.4695 | |
| }, | |
| { | |
| "step": 255, | |
| "loss": 0.0008 | |
| }, | |
| { | |
| "step": 256, | |
| "loss": 0.0066 | |
| }, | |
| { | |
| "step": 257, | |
| "loss": 0.0042 | |
| }, | |
| { | |
| "step": 258, | |
| "loss": 0.2334 | |
| }, | |
| { | |
| "step": 259, | |
| "loss": 0.0046 | |
| }, | |
| { | |
| "step": 260, | |
| "loss": 0.0207 | |
| }, | |
| { | |
| "step": 261, | |
| "loss": 0.0356 | |
| }, | |
| { | |
| "step": 262, | |
| "loss": 0.0008 | |
| }, | |
| { | |
| "step": 263, | |
| "loss": 0.0371 | |
| }, | |
| { | |
| "step": 264, | |
| "loss": 0.0009 | |
| }, | |
| { | |
| "step": 265, | |
| "loss": 0.0082 | |
| }, | |
| { | |
| "step": 266, | |
| "loss": 0.0259 | |
| }, | |
| { | |
| "step": 267, | |
| "loss": 0.0008 | |
| }, | |
| { | |
| "step": 268, | |
| "loss": 0.0002 | |
| }, | |
| { | |
| "step": 269, | |
| "loss": 0.0011 | |
| }, | |
| { | |
| "step": 270, | |
| "loss": 0.0207 | |
| }, | |
| { | |
| "step": 271, | |
| "loss": 0.001 | |
| }, | |
| { | |
| "step": 272, | |
| "loss": 0.0017 | |
| }, | |
| { | |
| "step": 273, | |
| "loss": 0.0622 | |
| }, | |
| { | |
| "step": 274, | |
| "loss": 0.0083 | |
| }, | |
| { | |
| "step": 275, | |
| "loss": 0.0025 | |
| }, | |
| { | |
| "step": 276, | |
| "loss": 0.0006 | |
| }, | |
| { | |
| "step": 277, | |
| "loss": 0.0452 | |
| }, | |
| { | |
| "step": 278, | |
| "loss": 0.0009 | |
| }, | |
| { | |
| "step": 279, | |
| "loss": 0.0005 | |
| }, | |
| { | |
| "step": 280, | |
| "loss": 0.0005 | |
| }, | |
| { | |
| "step": 281, | |
| "loss": 0.0019 | |
| }, | |
| { | |
| "step": 282, | |
| "loss": 0.062 | |
| }, | |
| { | |
| "step": 283, | |
| "loss": 0.0027 | |
| }, | |
| { | |
| "step": 284, | |
| "loss": 0.0376 | |
| }, | |
| { | |
| "step": 285, | |
| "loss": 0.0 | |
| }, | |
| { | |
| "step": 286, | |
| "loss": 0.001 | |
| }, | |
| { | |
| "step": 287, | |
| "loss": 0.0219 | |
| }, | |
| { | |
| "step": 288, | |
| "loss": 0.0077 | |
| }, | |
| { | |
| "step": 289, | |
| "loss": 0.0055 | |
| }, | |
| { | |
| "step": 290, | |
| "loss": 0.1378 | |
| }, | |
| { | |
| "step": 291, | |
| "loss": 0.0117 | |
| }, | |
| { | |
| "step": 292, | |
| "loss": 0.0976 | |
| }, | |
| { | |
| "step": 293, | |
| "loss": 0.0026 | |
| }, | |
| { | |
| "step": 294, | |
| "loss": 0.0037 | |
| }, | |
| { | |
| "step": 295, | |
| "loss": 0.113 | |
| }, | |
| { | |
| "step": 296, | |
| "loss": 0.047 | |
| }, | |
| { | |
| "step": 297, | |
| "loss": 0.0004 | |
| }, | |
| { | |
| "step": 298, | |
| "loss": 0.0001 | |
| }, | |
| { | |
| "step": 299, | |
| "loss": 0.0001 | |
| }, | |
| { | |
| "step": 300, | |
| "loss": 0.0045 | |
| }, | |
| { | |
| "step": 301, | |
| "loss": 0.1614 | |
| }, | |
| { | |
| "step": 302, | |
| "loss": 0.259 | |
| }, | |
| { | |
| "step": 303, | |
| "loss": 0.0775 | |
| }, | |
| { | |
| "step": 304, | |
| "loss": 0.0084 | |
| }, | |
| { | |
| "step": 305, | |
| "loss": 0.0198 | |
| }, | |
| { | |
| "step": 306, | |
| "loss": 0.0668 | |
| }, | |
| { | |
| "step": 307, | |
| "loss": 0.0047 | |
| }, | |
| { | |
| "step": 308, | |
| "loss": 0.2898 | |
| }, | |
| { | |
| "step": 309, | |
| "loss": 0.0158 | |
| }, | |
| { | |
| "step": 310, | |
| "loss": 0.3233 | |
| }, | |
| { | |
| "step": 311, | |
| "loss": 0.0631 | |
| }, | |
| { | |
| "step": 312, | |
| "loss": 0.0017 | |
| }, | |
| { | |
| "step": 313, | |
| "loss": 0.0414 | |
| }, | |
| { | |
| "step": 314, | |
| "loss": 0.0002 | |
| }, | |
| { | |
| "step": 315, | |
| "loss": 0.0002 | |
| }, | |
| { | |
| "step": 316, | |
| "loss": 0.0124 | |
| }, | |
| { | |
| "step": 317, | |
| "loss": 0.0658 | |
| }, | |
| { | |
| "step": 318, | |
| "loss": 0.0008 | |
| }, | |
| { | |
| "step": 319, | |
| "loss": 0.0021 | |
| }, | |
| { | |
| "step": 320, | |
| "loss": 0.0011 | |
| } | |
| ], | |
| "eval_loss": [ | |
| { | |
| "step": 200, | |
| "eval_loss": 0.07848864793777466 | |
| } | |
| ], | |
| "args": { | |
| "lora_r": 16, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.05, | |
| "lora_target_modules": "q_proj,v_proj", | |
| "tuning_strategy": "lora", | |
| "num_trainable_layers": 2, | |
| "output_dir": "sft_prefill/prompt_id_3/qwen2-VL-7B-Instruct-syn-count-lora", | |
| "num_train_epochs": 2, | |
| "learning_rate": 0.0002, | |
| "per_device_train_batch_size": 16, | |
| "per_device_eval_batch_size": 16, | |
| "gradient_accumulation_steps": 1, | |
| "logging_steps": 10, | |
| "eval_steps": 200, | |
| "save_steps": 200, | |
| "warmup_ratio": 0.03, | |
| "weight_decay": 0.0, | |
| "max_grad_norm": 0.3, | |
| "lr_scheduler_type": "constant", | |
| "bf16": true, | |
| "tf32": true, | |
| "gradient_checkpointing": true, | |
| "optim": "adamw_torch_fused", | |
| "ft_type": "SFT_Prefill", | |
| "data_type": "small", | |
| "prompt_id": 3 | |
| } | |
| } |