| { | |
| "best_metric": 1.3700028657913208, | |
| "best_model_checkpoint": "/app/finetuned_weights/checkpoint-800", | |
| "epoch": 0.4591434105746467, | |
| "eval_steps": 100, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005739292632183084, | |
| "grad_norm": 0.43322139978408813, | |
| "learning_rate": 0.0002, | |
| "loss": 2.051, | |
| "mean_token_accuracy": 0.6683308390900493, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011478585264366167, | |
| "grad_norm": 0.40237197279930115, | |
| "learning_rate": 0.0002, | |
| "loss": 1.7549, | |
| "mean_token_accuracy": 0.6822433151304722, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01721787789654925, | |
| "grad_norm": 0.43123266100883484, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5103, | |
| "mean_token_accuracy": 0.7066726513206959, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.022957170528732335, | |
| "grad_norm": 0.16466361284255981, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4444, | |
| "mean_token_accuracy": 0.7159745823591948, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02869646316091542, | |
| "grad_norm": 0.3186506927013397, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4808, | |
| "mean_token_accuracy": 0.7143966030329466, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0344357557930985, | |
| "grad_norm": 0.49696969985961914, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5015, | |
| "mean_token_accuracy": 0.7038091894239187, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.040175048425281586, | |
| "grad_norm": 0.2998158037662506, | |
| "learning_rate": 0.0002, | |
| "loss": 1.345, | |
| "mean_token_accuracy": 0.7281523209065199, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04591434105746467, | |
| "grad_norm": 0.2879635989665985, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4034, | |
| "mean_token_accuracy": 0.7154877178370953, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05165363368964775, | |
| "grad_norm": 0.16925716400146484, | |
| "learning_rate": 0.0002, | |
| "loss": 1.6293, | |
| "mean_token_accuracy": 0.6787833951413631, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05739292632183084, | |
| "grad_norm": 0.2507581412792206, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3949, | |
| "mean_token_accuracy": 0.7227997560054064, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05739292632183084, | |
| "eval_loss": 1.4279941320419312, | |
| "eval_mean_token_accuracy": 0.7138351793184543, | |
| "eval_runtime": 4534.8411, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.384, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06313221895401391, | |
| "grad_norm": 0.24556811153888702, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3832, | |
| "mean_token_accuracy": 0.7172763034701347, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.068871511586197, | |
| "grad_norm": 0.24670149385929108, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4245, | |
| "mean_token_accuracy": 0.7150326510891318, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07461080421838008, | |
| "grad_norm": 0.27052056789398193, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2266, | |
| "mean_token_accuracy": 0.7490846037864685, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08035009685056317, | |
| "grad_norm": 0.2287702113389969, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4237, | |
| "mean_token_accuracy": 0.7114990293979645, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08608938948274625, | |
| "grad_norm": 0.2251950353384018, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4939, | |
| "mean_token_accuracy": 0.7060536827892065, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09182868211492934, | |
| "grad_norm": 0.2458341121673584, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4142, | |
| "mean_token_accuracy": 0.7157001797109842, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09756797474711242, | |
| "grad_norm": 0.1824209988117218, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2918, | |
| "mean_token_accuracy": 0.7333439949899911, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1033072673792955, | |
| "grad_norm": 0.23465971648693085, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3675, | |
| "mean_token_accuracy": 0.7236015398055315, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10904656001147858, | |
| "grad_norm": 0.2167435586452484, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4979, | |
| "mean_token_accuracy": 0.7010378727689386, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.11478585264366167, | |
| "grad_norm": 0.24258023500442505, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3437, | |
| "mean_token_accuracy": 0.7294836457818746, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11478585264366167, | |
| "eval_loss": 1.402819037437439, | |
| "eval_mean_token_accuracy": 0.716681150906536, | |
| "eval_runtime": 4414.3312, | |
| "eval_samples_per_second": 0.789, | |
| "eval_steps_per_second": 0.395, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12052514527584475, | |
| "grad_norm": 0.24051423370838165, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3826, | |
| "mean_token_accuracy": 0.7158944692462683, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12626443790802783, | |
| "grad_norm": 0.25226420164108276, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2985, | |
| "mean_token_accuracy": 0.733695725724101, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.13200373054021092, | |
| "grad_norm": 0.2106948047876358, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3394, | |
| "mean_token_accuracy": 0.7303649850189686, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.137743023172394, | |
| "grad_norm": 0.21339824795722961, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3752, | |
| "mean_token_accuracy": 0.7253331538289786, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1434823158045771, | |
| "grad_norm": 0.2484087496995926, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4659, | |
| "mean_token_accuracy": 0.7037245020270347, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14922160843676016, | |
| "grad_norm": 0.24411025643348694, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3957, | |
| "mean_token_accuracy": 0.7138343520462513, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.15496090106894325, | |
| "grad_norm": 0.2551439702510834, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3914, | |
| "mean_token_accuracy": 0.713682159781456, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.16070019370112634, | |
| "grad_norm": 0.258771151304245, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4153, | |
| "mean_token_accuracy": 0.7100905137136578, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16643948633330943, | |
| "grad_norm": 0.20730619132518768, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3201, | |
| "mean_token_accuracy": 0.733038941025734, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1721787789654925, | |
| "grad_norm": 0.17945091426372528, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5173, | |
| "mean_token_accuracy": 0.6967695135623216, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1721787789654925, | |
| "eval_loss": 1.3912384510040283, | |
| "eval_mean_token_accuracy": 0.7176746699191754, | |
| "eval_runtime": 4400.1017, | |
| "eval_samples_per_second": 0.792, | |
| "eval_steps_per_second": 0.396, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1779180715976756, | |
| "grad_norm": 0.25128230452537537, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4181, | |
| "mean_token_accuracy": 0.715370923653245, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.18365736422985868, | |
| "grad_norm": 0.22883553802967072, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4089, | |
| "mean_token_accuracy": 0.718804694339633, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.18939665686204174, | |
| "grad_norm": 0.2196984440088272, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2547, | |
| "mean_token_accuracy": 0.7411983285099268, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.19513594949422483, | |
| "grad_norm": 0.374326229095459, | |
| "learning_rate": 0.0002, | |
| "loss": 1.552, | |
| "mean_token_accuracy": 0.6875970430672169, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.20087524212640792, | |
| "grad_norm": 0.6579405665397644, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3655, | |
| "mean_token_accuracy": 0.7201284021139145, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.206614534758591, | |
| "grad_norm": 0.2102547287940979, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3867, | |
| "mean_token_accuracy": 0.7191131260246039, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.21235382739077407, | |
| "grad_norm": 0.26832231879234314, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4089, | |
| "mean_token_accuracy": 0.7181429363787174, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.21809312002295717, | |
| "grad_norm": 0.25602883100509644, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3422, | |
| "mean_token_accuracy": 0.7277179971337319, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.22383241265514026, | |
| "grad_norm": 0.2577485144138336, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4422, | |
| "mean_token_accuracy": 0.7109258253127336, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.22957170528732335, | |
| "grad_norm": 0.2750665247440338, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2633, | |
| "mean_token_accuracy": 0.7383943419903517, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.22957170528732335, | |
| "eval_loss": 1.38496732711792, | |
| "eval_mean_token_accuracy": 0.7186302011869997, | |
| "eval_runtime": 4400.5889, | |
| "eval_samples_per_second": 0.792, | |
| "eval_steps_per_second": 0.396, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2353109979195064, | |
| "grad_norm": 0.23557531833648682, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3842, | |
| "mean_token_accuracy": 0.7180797912180423, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2410502905516895, | |
| "grad_norm": 0.2660968601703644, | |
| "learning_rate": 0.0002, | |
| "loss": 1.401, | |
| "mean_token_accuracy": 0.7145325090736151, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2467895831838726, | |
| "grad_norm": 0.2272387444972992, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4956, | |
| "mean_token_accuracy": 0.6978571161627769, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.25252887581605565, | |
| "grad_norm": 0.2202438861131668, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3757, | |
| "mean_token_accuracy": 0.7206571504473687, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.25826816844823874, | |
| "grad_norm": 0.24659469723701477, | |
| "learning_rate": 0.0002, | |
| "loss": 1.388, | |
| "mean_token_accuracy": 0.7190396279096604, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.26400746108042183, | |
| "grad_norm": 0.20384320616722107, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2807, | |
| "mean_token_accuracy": 0.7384566117078066, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2697467537126049, | |
| "grad_norm": 0.2716342806816101, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3157, | |
| "mean_token_accuracy": 0.7307378999888897, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.275486046344788, | |
| "grad_norm": 0.2534655034542084, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3959, | |
| "mean_token_accuracy": 0.7178040158003569, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2812253389769711, | |
| "grad_norm": 0.21825498342514038, | |
| "learning_rate": 0.0002, | |
| "loss": 1.387, | |
| "mean_token_accuracy": 0.7204662635922432, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2869646316091542, | |
| "grad_norm": 0.2534162402153015, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2776, | |
| "mean_token_accuracy": 0.7423365503549576, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2869646316091542, | |
| "eval_loss": 1.3787877559661865, | |
| "eval_mean_token_accuracy": 0.7194652643548437, | |
| "eval_runtime": 7761.4408, | |
| "eval_samples_per_second": 0.449, | |
| "eval_steps_per_second": 0.225, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.29270392424133723, | |
| "grad_norm": 0.23998941481113434, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2277, | |
| "mean_token_accuracy": 0.7432656295597553, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2984432168735203, | |
| "grad_norm": 0.23271049559116364, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2776, | |
| "mean_token_accuracy": 0.7337239418178797, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3041825095057034, | |
| "grad_norm": 0.2755042016506195, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4288, | |
| "mean_token_accuracy": 0.7133219081908464, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3099218021378865, | |
| "grad_norm": 0.21231453120708466, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4229, | |
| "mean_token_accuracy": 0.7119937628507614, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3156610947700696, | |
| "grad_norm": 0.2159433215856552, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3043, | |
| "mean_token_accuracy": 0.7337923284620047, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3214003874022527, | |
| "grad_norm": 0.238509863615036, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3429, | |
| "mean_token_accuracy": 0.7277234088629484, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3271396800344358, | |
| "grad_norm": 0.27093520760536194, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3089, | |
| "mean_token_accuracy": 0.7319676581770181, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.33287897266661887, | |
| "grad_norm": 0.21662364900112152, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3448, | |
| "mean_token_accuracy": 0.72448665574193, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3386182652988019, | |
| "grad_norm": 0.25919026136398315, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3413, | |
| "mean_token_accuracy": 0.7278214626014232, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.344357557930985, | |
| "grad_norm": 0.2097223550081253, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3812, | |
| "mean_token_accuracy": 0.7174393549561501, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.344357557930985, | |
| "eval_loss": 1.3745734691619873, | |
| "eval_mean_token_accuracy": 0.7201109681998733, | |
| "eval_runtime": 6592.6776, | |
| "eval_samples_per_second": 0.529, | |
| "eval_steps_per_second": 0.264, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3500968505631681, | |
| "grad_norm": 0.27309486269950867, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3764, | |
| "mean_token_accuracy": 0.7236971091479063, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3558361431953512, | |
| "grad_norm": 0.2800423204898834, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4141, | |
| "mean_token_accuracy": 0.7132530447095633, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.36157543582753426, | |
| "grad_norm": 0.32200849056243896, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5291, | |
| "mean_token_accuracy": 0.6955816943198443, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.36731472845971735, | |
| "grad_norm": 0.1762777417898178, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2729, | |
| "mean_token_accuracy": 0.7379875779151917, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.37305402109190045, | |
| "grad_norm": 0.27259498834609985, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3315, | |
| "mean_token_accuracy": 0.7254374325275421, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3787933137240835, | |
| "grad_norm": 0.3148305118083954, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4009, | |
| "mean_token_accuracy": 0.7169190965592861, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.38453260635626657, | |
| "grad_norm": 0.2222924679517746, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4422, | |
| "mean_token_accuracy": 0.7057445451617241, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.39027189898844966, | |
| "grad_norm": 0.30782487988471985, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3916, | |
| "mean_token_accuracy": 0.7196735937148333, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.39601119162063275, | |
| "grad_norm": 0.24766255915164948, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3191, | |
| "mean_token_accuracy": 0.7313117351382971, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.40175048425281584, | |
| "grad_norm": 0.26929622888565063, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3379, | |
| "mean_token_accuracy": 0.7244091514497996, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.40175048425281584, | |
| "eval_loss": 1.370943307876587, | |
| "eval_mean_token_accuracy": 0.720665924306649, | |
| "eval_runtime": 4504.2557, | |
| "eval_samples_per_second": 0.774, | |
| "eval_steps_per_second": 0.387, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.40748977688499893, | |
| "grad_norm": 0.22724460065364838, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3541, | |
| "mean_token_accuracy": 0.7258114762604236, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.413229069517182, | |
| "grad_norm": 0.22957713901996613, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3855, | |
| "mean_token_accuracy": 0.7198538523167372, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4189683621493651, | |
| "grad_norm": 0.23907890915870667, | |
| "learning_rate": 0.0002, | |
| "loss": 1.427, | |
| "mean_token_accuracy": 0.7128315325826406, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.42470765478154815, | |
| "grad_norm": 0.2534020245075226, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3615, | |
| "mean_token_accuracy": 0.7198588822036982, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.43044694741373124, | |
| "grad_norm": 0.24605919420719147, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3972, | |
| "mean_token_accuracy": 0.7115087192505598, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.43618624004591433, | |
| "grad_norm": 0.2243734747171402, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2523, | |
| "mean_token_accuracy": 0.7429927971214056, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4419255326780974, | |
| "grad_norm": 0.230802983045578, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4611, | |
| "mean_token_accuracy": 0.7064661320298911, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4476648253102805, | |
| "grad_norm": 0.23596514761447906, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3961, | |
| "mean_token_accuracy": 0.7153716452419758, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4534041179424636, | |
| "grad_norm": 0.29981374740600586, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3467, | |
| "mean_token_accuracy": 0.722964895889163, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4591434105746467, | |
| "grad_norm": 0.25450626015663147, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3928, | |
| "mean_token_accuracy": 0.7192676767706871, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4591434105746467, | |
| "eval_loss": 1.3700028657913208, | |
| "eval_mean_token_accuracy": 0.720344717953647, | |
| "eval_runtime": 5677.4803, | |
| "eval_samples_per_second": 0.614, | |
| "eval_steps_per_second": 0.307, | |
| "step": 800 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3484, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.3950907424144466e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |