|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6837606837606838, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017094017094017096, |
|
"grad_norm": 35.00202204303521, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7455849647521973, |
|
"logits/rejected": -2.7442612648010254, |
|
"logps/chosen": -164.2725830078125, |
|
"logps/rejected": -170.57113647460938, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.23749999701976776, |
|
"rewards/chosen": 0.0026612328365445137, |
|
"rewards/margins": -0.001539617427624762, |
|
"rewards/rejected": 0.004200850613415241, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03418803418803419, |
|
"grad_norm": 36.29266486314593, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.709902763366699, |
|
"logits/rejected": -2.7155404090881348, |
|
"logps/chosen": -171.80032348632812, |
|
"logps/rejected": -165.20169067382812, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.012009668163955212, |
|
"rewards/margins": 0.0021203968208283186, |
|
"rewards/rejected": 0.009889272041618824, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05128205128205128, |
|
"grad_norm": 33.83921269470837, |
|
"learning_rate": 9.999177507263144e-07, |
|
"logits/chosen": -2.6502068042755127, |
|
"logits/rejected": -2.628007411956787, |
|
"logps/chosen": -174.082275390625, |
|
"logps/rejected": -174.13429260253906, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.23495244979858398, |
|
"rewards/margins": 0.1125468835234642, |
|
"rewards/rejected": 0.12240554392337799, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06837606837606838, |
|
"grad_norm": 34.14427373918799, |
|
"learning_rate": 9.996710299650301e-07, |
|
"logits/chosen": -2.473665714263916, |
|
"logits/rejected": -2.4469008445739746, |
|
"logps/chosen": -158.2163848876953, |
|
"logps/rejected": -158.0710906982422, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.4233472943305969, |
|
"rewards/margins": 0.1434161365032196, |
|
"rewards/rejected": 0.2799311578273773, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08547008547008547, |
|
"grad_norm": 33.2696083475879, |
|
"learning_rate": 9.992599188865604e-07, |
|
"logits/chosen": -2.314507007598877, |
|
"logits/rejected": -2.3168132305145264, |
|
"logps/chosen": -150.67019653320312, |
|
"logps/rejected": -156.8417510986328, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.4975205063819885, |
|
"rewards/margins": 0.15743504464626312, |
|
"rewards/rejected": 0.3400854766368866, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"grad_norm": 34.42253361988952, |
|
"learning_rate": 9.98684552745256e-07, |
|
"logits/chosen": -2.243194103240967, |
|
"logits/rejected": -2.251340866088867, |
|
"logps/chosen": -161.2266845703125, |
|
"logps/rejected": -161.32298278808594, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.4243805408477783, |
|
"rewards/margins": 0.2635195851325989, |
|
"rewards/rejected": 0.16086098551750183, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11965811965811966, |
|
"grad_norm": 31.414296706456245, |
|
"learning_rate": 9.979451208349055e-07, |
|
"logits/chosen": -2.30315899848938, |
|
"logits/rejected": -2.289762496948242, |
|
"logps/chosen": -171.71713256835938, |
|
"logps/rejected": -174.50900268554688, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.019384615123271942, |
|
"rewards/margins": 0.318477988243103, |
|
"rewards/rejected": -0.3378625512123108, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13675213675213677, |
|
"grad_norm": 32.071830655862556, |
|
"learning_rate": 9.970418664264595e-07, |
|
"logits/chosen": -2.3935599327087402, |
|
"logits/rejected": -2.3812546730041504, |
|
"logps/chosen": -171.0698699951172, |
|
"logps/rejected": -176.58578491210938, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.26089853048324585, |
|
"rewards/margins": 0.5235068202018738, |
|
"rewards/rejected": -0.7844053506851196, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 36.19466541168301, |
|
"learning_rate": 9.95975086687994e-07, |
|
"logits/chosen": -2.4914021492004395, |
|
"logits/rejected": -2.4973323345184326, |
|
"logps/chosen": -163.68099975585938, |
|
"logps/rejected": -167.174072265625, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2962096929550171, |
|
"rewards/margins": 0.4588828682899475, |
|
"rewards/rejected": -0.7550925016403198, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"grad_norm": 31.16276115760231, |
|
"learning_rate": 9.947451325869439e-07, |
|
"logits/chosen": -2.5575203895568848, |
|
"logits/rejected": -2.557717800140381, |
|
"logps/chosen": -172.04318237304688, |
|
"logps/rejected": -177.67672729492188, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.2069791853427887, |
|
"rewards/margins": 0.6018465757369995, |
|
"rewards/rejected": -0.808825671672821, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"eval_logits/chosen": -2.5221025943756104, |
|
"eval_logits/rejected": -2.5152711868286133, |
|
"eval_logps/chosen": -163.01820373535156, |
|
"eval_logps/rejected": -169.54832458496094, |
|
"eval_loss": 0.5812540650367737, |
|
"eval_rewards/accuracies": 0.6682692170143127, |
|
"eval_rewards/chosen": -0.45408713817596436, |
|
"eval_rewards/margins": 0.6127156615257263, |
|
"eval_rewards/rejected": -1.0668028593063354, |
|
"eval_runtime": 510.3361, |
|
"eval_samples_per_second": 16.291, |
|
"eval_steps_per_second": 0.255, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18803418803418803, |
|
"grad_norm": 31.575578721339145, |
|
"learning_rate": 9.933524087746347e-07, |
|
"logits/chosen": -2.490377426147461, |
|
"logits/rejected": -2.4825081825256348, |
|
"logps/chosen": -168.06161499023438, |
|
"logps/rejected": -175.0494384765625, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5060762763023376, |
|
"rewards/margins": 0.7589826583862305, |
|
"rewards/rejected": -1.2650587558746338, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.20512820512820512, |
|
"grad_norm": 30.171745273288415, |
|
"learning_rate": 9.917973734531549e-07, |
|
"logits/chosen": -2.48228120803833, |
|
"logits/rejected": -2.4833157062530518, |
|
"logps/chosen": -159.47142028808594, |
|
"logps/rejected": -170.63671875, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.35752761363983154, |
|
"rewards/margins": 0.5991309881210327, |
|
"rewards/rejected": -0.9566585421562195, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 32.13878319029882, |
|
"learning_rate": 9.90080538224607e-07, |
|
"logits/chosen": -2.585407018661499, |
|
"logits/rejected": -2.5767769813537598, |
|
"logps/chosen": -157.43936157226562, |
|
"logps/rejected": -166.13589477539062, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.02057185396552086, |
|
"rewards/margins": 0.47568243741989136, |
|
"rewards/rejected": -0.4962543547153473, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.23931623931623933, |
|
"grad_norm": 29.494674721856043, |
|
"learning_rate": 9.882024679227938e-07, |
|
"logits/chosen": -2.6504979133605957, |
|
"logits/rejected": -2.6398470401763916, |
|
"logps/chosen": -178.0801239013672, |
|
"logps/rejected": -179.46328735351562, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.43436694145202637, |
|
"rewards/margins": 0.8427752256393433, |
|
"rewards/rejected": -1.27714204788208, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 28.856733948308104, |
|
"learning_rate": 9.861637804273881e-07, |
|
"logits/chosen": -2.660489082336426, |
|
"logits/rejected": -2.655539035797119, |
|
"logps/chosen": -162.1233673095703, |
|
"logps/rejected": -170.16131591796875, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4032784402370453, |
|
"rewards/margins": 0.6959114074707031, |
|
"rewards/rejected": -1.0991899967193604, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.27350427350427353, |
|
"grad_norm": 26.646061534818323, |
|
"learning_rate": 9.83965146460653e-07, |
|
"logits/chosen": -2.6391615867614746, |
|
"logits/rejected": -2.628577709197998, |
|
"logps/chosen": -168.58099365234375, |
|
"logps/rejected": -179.22805786132812, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6665827035903931, |
|
"rewards/margins": 0.8240470886230469, |
|
"rewards/rejected": -1.4906299114227295, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2905982905982906, |
|
"grad_norm": 36.04159750418885, |
|
"learning_rate": 9.816072893667758e-07, |
|
"logits/chosen": -2.6322970390319824, |
|
"logits/rejected": -2.6053385734558105, |
|
"logps/chosen": -174.82640075683594, |
|
"logps/rejected": -186.0735626220703, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0639268159866333, |
|
"rewards/margins": 1.0258175134658813, |
|
"rewards/rejected": -2.0897443294525146, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 26.922939193632168, |
|
"learning_rate": 9.790909848738904e-07, |
|
"logits/chosen": -2.60801362991333, |
|
"logits/rejected": -2.6101624965667725, |
|
"logps/chosen": -176.20538330078125, |
|
"logps/rejected": -184.7812957763672, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9929834604263306, |
|
"rewards/margins": 0.8646324276924133, |
|
"rewards/rejected": -1.8576160669326782, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3247863247863248, |
|
"grad_norm": 30.3564450245371, |
|
"learning_rate": 9.764170608388647e-07, |
|
"logits/chosen": -2.6054036617279053, |
|
"logits/rejected": -2.5733799934387207, |
|
"logps/chosen": -168.037109375, |
|
"logps/rejected": -174.51144409179688, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6652337312698364, |
|
"rewards/margins": 1.060430884361267, |
|
"rewards/rejected": -1.725664734840393, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 28.936164680674203, |
|
"learning_rate": 9.735863969749371e-07, |
|
"logits/chosen": -2.5255179405212402, |
|
"logits/rejected": -2.4874520301818848, |
|
"logps/chosen": -177.73861694335938, |
|
"logps/rejected": -189.82369995117188, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8511013984680176, |
|
"rewards/margins": 1.1354777812957764, |
|
"rewards/rejected": -1.986579179763794, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"eval_logits/chosen": -2.484687328338623, |
|
"eval_logits/rejected": -2.460559368133545, |
|
"eval_logps/chosen": -168.28323364257812, |
|
"eval_logps/rejected": -180.8539276123047, |
|
"eval_loss": 0.5161151885986328, |
|
"eval_rewards/accuracies": 0.7211538553237915, |
|
"eval_rewards/chosen": -0.9805887937545776, |
|
"eval_rewards/margins": 1.2167747020721436, |
|
"eval_rewards/rejected": -2.1973636150360107, |
|
"eval_runtime": 510.3447, |
|
"eval_samples_per_second": 16.291, |
|
"eval_steps_per_second": 0.255, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.358974358974359, |
|
"grad_norm": 31.089971589067016, |
|
"learning_rate": 9.705999245622956e-07, |
|
"logits/chosen": -2.4702706336975098, |
|
"logits/rejected": -2.4523651599884033, |
|
"logps/chosen": -170.59246826171875, |
|
"logps/rejected": -182.99813842773438, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8999192118644714, |
|
"rewards/margins": 0.8702341318130493, |
|
"rewards/rejected": -1.770153284072876, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.37606837606837606, |
|
"grad_norm": 27.339023914835686, |
|
"learning_rate": 9.674586261416873e-07, |
|
"logits/chosen": -2.4866347312927246, |
|
"logits/rejected": -2.4518179893493652, |
|
"logps/chosen": -179.46290588378906, |
|
"logps/rejected": -188.7920379638672, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6938365697860718, |
|
"rewards/margins": 1.0765600204467773, |
|
"rewards/rejected": -1.7703965902328491, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.39316239316239315, |
|
"grad_norm": 31.312902469600562, |
|
"learning_rate": 9.641635351911664e-07, |
|
"logits/chosen": -2.4456398487091064, |
|
"logits/rejected": -2.426159381866455, |
|
"logps/chosen": -170.3855438232422, |
|
"logps/rejected": -181.9676513671875, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8490931391716003, |
|
"rewards/margins": 1.2224478721618652, |
|
"rewards/rejected": -2.0715408325195312, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"grad_norm": 25.966469642807997, |
|
"learning_rate": 9.607157357860821e-07, |
|
"logits/chosen": -2.4072113037109375, |
|
"logits/rejected": -2.3874144554138184, |
|
"logps/chosen": -187.41197204589844, |
|
"logps/rejected": -201.69454956054688, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0798847675323486, |
|
"rewards/margins": 1.3365159034729004, |
|
"rewards/rejected": -2.41640043258667, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"grad_norm": 32.18242375190423, |
|
"learning_rate": 9.571163622424225e-07, |
|
"logits/chosen": -2.2766659259796143, |
|
"logits/rejected": -2.252072811126709, |
|
"logps/chosen": -174.78514099121094, |
|
"logps/rejected": -187.40646362304688, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.5247443914413452, |
|
"rewards/margins": 1.2177503108978271, |
|
"rewards/rejected": -2.742494821548462, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 30.579550576640443, |
|
"learning_rate": 9.533665987436261e-07, |
|
"logits/chosen": -2.182610034942627, |
|
"logits/rejected": -2.128113269805908, |
|
"logps/chosen": -178.1033477783203, |
|
"logps/rejected": -197.4458465576172, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5623412132263184, |
|
"rewards/margins": 1.2608497142791748, |
|
"rewards/rejected": -2.823190689086914, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 29.287644373971865, |
|
"learning_rate": 9.494676789509899e-07, |
|
"logits/chosen": -2.1067867279052734, |
|
"logits/rejected": -2.0683400630950928, |
|
"logps/chosen": -176.67918395996094, |
|
"logps/rejected": -193.65371704101562, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1053364276885986, |
|
"rewards/margins": 1.3829355239868164, |
|
"rewards/rejected": -2.488272190093994, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.47863247863247865, |
|
"grad_norm": 27.563555703636343, |
|
"learning_rate": 9.454208855977985e-07, |
|
"logits/chosen": -2.0855822563171387, |
|
"logits/rejected": -2.013296127319336, |
|
"logps/chosen": -178.40390014648438, |
|
"logps/rejected": -196.03305053710938, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.443866491317749, |
|
"rewards/margins": 1.600778341293335, |
|
"rewards/rejected": -3.044644832611084, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.49572649572649574, |
|
"grad_norm": 30.234814125811326, |
|
"learning_rate": 9.41227550067308e-07, |
|
"logits/chosen": -2.0734238624572754, |
|
"logits/rejected": -2.0634400844573975, |
|
"logps/chosen": -179.51080322265625, |
|
"logps/rejected": -191.87046813964844, |
|
"loss": 0.4798, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6011661291122437, |
|
"rewards/margins": 1.445229172706604, |
|
"rewards/rejected": -3.0463955402374268, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 31.371346339775513, |
|
"learning_rate": 9.36889051954725e-07, |
|
"logits/chosen": -2.127821683883667, |
|
"logits/rejected": -2.080082416534424, |
|
"logps/chosen": -180.66383361816406, |
|
"logps/rejected": -196.1031494140625, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6335647106170654, |
|
"rewards/margins": 1.5422546863555908, |
|
"rewards/rejected": -3.1758196353912354, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"eval_logits/chosen": -2.1472573280334473, |
|
"eval_logits/rejected": -2.0990829467773438, |
|
"eval_logps/chosen": -173.8290557861328, |
|
"eval_logps/rejected": -191.683349609375, |
|
"eval_loss": 0.47699737548828125, |
|
"eval_rewards/accuracies": 0.754807710647583, |
|
"eval_rewards/chosen": -1.5351712703704834, |
|
"eval_rewards/margins": 1.7451337575912476, |
|
"eval_rewards/rejected": -3.2803049087524414, |
|
"eval_runtime": 510.7048, |
|
"eval_samples_per_second": 16.279, |
|
"eval_steps_per_second": 0.255, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5299145299145299, |
|
"grad_norm": 27.790777356361556, |
|
"learning_rate": 9.324068186133245e-07, |
|
"logits/chosen": -2.1372084617614746, |
|
"logits/rejected": -2.124948024749756, |
|
"logps/chosen": -172.3369598388672, |
|
"logps/rejected": -186.5850372314453, |
|
"loss": 0.4644, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3583369255065918, |
|
"rewards/margins": 1.7439367771148682, |
|
"rewards/rejected": -3.102273464202881, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.5470085470085471, |
|
"grad_norm": 27.612583401785376, |
|
"learning_rate": 9.277823246848536e-07, |
|
"logits/chosen": -2.2635793685913086, |
|
"logits/rejected": -2.2123026847839355, |
|
"logps/chosen": -186.25137329101562, |
|
"logps/rejected": -196.69564819335938, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2192834615707397, |
|
"rewards/margins": 1.371063470840454, |
|
"rewards/rejected": -2.5903468132019043, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5641025641025641, |
|
"grad_norm": 28.168886287584876, |
|
"learning_rate": 9.230170916143793e-07, |
|
"logits/chosen": -2.3309006690979004, |
|
"logits/rejected": -2.2978808879852295, |
|
"logps/chosen": -174.7559814453125, |
|
"logps/rejected": -195.68280029296875, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0855658054351807, |
|
"rewards/margins": 1.6249233484268188, |
|
"rewards/rejected": -2.710489273071289, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.5811965811965812, |
|
"grad_norm": 28.93959851544435, |
|
"learning_rate": 9.181126871497378e-07, |
|
"logits/chosen": -2.376833915710449, |
|
"logits/rejected": -2.340681552886963, |
|
"logps/chosen": -175.3675537109375, |
|
"logps/rejected": -194.9619903564453, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9624043703079224, |
|
"rewards/margins": 1.7745708227157593, |
|
"rewards/rejected": -2.7369751930236816, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5982905982905983, |
|
"grad_norm": 30.43477724579486, |
|
"learning_rate": 9.130707248257491e-07, |
|
"logits/chosen": -2.458378553390503, |
|
"logits/rejected": -2.4171223640441895, |
|
"logps/chosen": -168.79849243164062, |
|
"logps/rejected": -178.6556396484375, |
|
"loss": 0.4728, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9254748225212097, |
|
"rewards/margins": 1.5751961469650269, |
|
"rewards/rejected": -2.500671148300171, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 25.220318056395065, |
|
"learning_rate": 9.078928634333698e-07, |
|
"logits/chosen": -2.4454641342163086, |
|
"logits/rejected": -2.4170265197753906, |
|
"logps/chosen": -181.41317749023438, |
|
"logps/rejected": -199.88668823242188, |
|
"loss": 0.4526, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.84205561876297, |
|
"rewards/margins": 1.7803510427474976, |
|
"rewards/rejected": -2.622406482696533, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6324786324786325, |
|
"grad_norm": 29.414031929374275, |
|
"learning_rate": 9.025808064739549e-07, |
|
"logits/chosen": -2.4103400707244873, |
|
"logits/rejected": -2.370731830596924, |
|
"logps/chosen": -178.70916748046875, |
|
"logps/rejected": -193.0004119873047, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.174070119857788, |
|
"rewards/margins": 1.535239338874817, |
|
"rewards/rejected": -2.7093093395233154, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.6495726495726496, |
|
"grad_norm": 26.0372223221703, |
|
"learning_rate": 8.971363015988113e-07, |
|
"logits/chosen": -2.3428735733032227, |
|
"logits/rejected": -2.2986531257629395, |
|
"logps/chosen": -173.8651580810547, |
|
"logps/rejected": -194.9317626953125, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1425771713256836, |
|
"rewards/margins": 1.611919641494751, |
|
"rewards/rejected": -2.7544968128204346, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 33.87434178682573, |
|
"learning_rate": 8.91561140034225e-07, |
|
"logits/chosen": -2.2664923667907715, |
|
"logits/rejected": -2.2088184356689453, |
|
"logps/chosen": -172.7240753173828, |
|
"logps/rejected": -193.1275177001953, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3134868144989014, |
|
"rewards/margins": 1.5050963163375854, |
|
"rewards/rejected": -2.8185834884643555, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 25.21313391058931, |
|
"learning_rate": 8.858571559921537e-07, |
|
"logits/chosen": -2.191737174987793, |
|
"logits/rejected": -2.1188113689422607, |
|
"logps/chosen": -174.46722412109375, |
|
"logps/rejected": -188.4197540283203, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.026064157485962, |
|
"rewards/margins": 1.5865710973739624, |
|
"rewards/rejected": -2.612635374069214, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"eval_logits/chosen": -2.1586899757385254, |
|
"eval_logits/rejected": -2.1090493202209473, |
|
"eval_logps/chosen": -170.4287872314453, |
|
"eval_logps/rejected": -187.2865447998047, |
|
"eval_loss": 0.45979756116867065, |
|
"eval_rewards/accuracies": 0.7596153616905212, |
|
"eval_rewards/chosen": -1.1951465606689453, |
|
"eval_rewards/margins": 1.64547860622406, |
|
"eval_rewards/rejected": -2.840625047683716, |
|
"eval_runtime": 510.4854, |
|
"eval_samples_per_second": 16.286, |
|
"eval_steps_per_second": 0.255, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 876, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2358113407598592.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|