File size: 12,532 Bytes
7ebce86 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8691588785046729,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09345794392523364,
"grad_norm": 66.9687943936917,
"learning_rate": 5e-07,
"logits/chosen": -2.720803737640381,
"logits/rejected": -2.7183666229248047,
"logps/chosen": -237.3436279296875,
"logps/rejected": -190.54464721679688,
"loss": 0.6913,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": 0.013445606455206871,
"rewards/margins": 0.008647488430142403,
"rewards/rejected": 0.00479811942204833,
"step": 5
},
{
"epoch": 0.18691588785046728,
"grad_norm": 48.6419981873445,
"learning_rate": 1e-06,
"logits/chosen": -2.679405689239502,
"logits/rejected": -2.670754909515381,
"logps/chosen": -279.81866455078125,
"logps/rejected": -226.22573852539062,
"loss": 0.6556,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": 0.4765666127204895,
"rewards/margins": 0.12125828117132187,
"rewards/rejected": 0.35530832409858704,
"step": 10
},
{
"epoch": 0.2803738317757009,
"grad_norm": 44.827387730520904,
"learning_rate": 9.972240926774166e-07,
"logits/chosen": -2.5193655490875244,
"logits/rejected": -2.510051965713501,
"logps/chosen": -236.1126708984375,
"logps/rejected": -201.1164093017578,
"loss": 0.6375,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": 1.2661212682724,
"rewards/margins": 0.5709505677223206,
"rewards/rejected": 0.6951709985733032,
"step": 15
},
{
"epoch": 0.37383177570093457,
"grad_norm": 51.33633927747967,
"learning_rate": 9.889271933555212e-07,
"logits/chosen": -2.4093480110168457,
"logits/rejected": -2.381843090057373,
"logps/chosen": -258.9214782714844,
"logps/rejected": -220.68408203125,
"loss": 0.6632,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": 1.2196061611175537,
"rewards/margins": 0.9229635000228882,
"rewards/rejected": 0.2966426610946655,
"step": 20
},
{
"epoch": 0.4672897196261682,
"grad_norm": 58.55983526769123,
"learning_rate": 9.752014277286431e-07,
"logits/chosen": -2.319462299346924,
"logits/rejected": -2.303922653198242,
"logps/chosen": -254.1189422607422,
"logps/rejected": -196.4254608154297,
"loss": 0.6806,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": 1.0977602005004883,
"rewards/margins": 0.803708553314209,
"rewards/rejected": 0.2940516173839569,
"step": 25
},
{
"epoch": 0.5607476635514018,
"grad_norm": 38.96884797085844,
"learning_rate": 9.561992016100291e-07,
"logits/chosen": -2.380964994430542,
"logits/rejected": -2.357675075531006,
"logps/chosen": -242.76406860351562,
"logps/rejected": -221.53903198242188,
"loss": 0.6103,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": 0.37621065974235535,
"rewards/margins": 0.7926680445671082,
"rewards/rejected": -0.4164574146270752,
"step": 30
},
{
"epoch": 0.6542056074766355,
"grad_norm": 49.029442328071866,
"learning_rate": 9.321315086741915e-07,
"logits/chosen": -2.3943734169006348,
"logits/rejected": -2.386823892593384,
"logps/chosen": -251.5531463623047,
"logps/rejected": -226.2257537841797,
"loss": 0.61,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.37054210901260376,
"rewards/margins": 0.955781102180481,
"rewards/rejected": -0.5852389931678772,
"step": 35
},
{
"epoch": 0.7476635514018691,
"grad_norm": 47.16621948348876,
"learning_rate": 9.032655876613635e-07,
"logits/chosen": -2.323800802230835,
"logits/rejected": -2.296937942504883,
"logps/chosen": -255.12149047851562,
"logps/rejected": -216.43679809570312,
"loss": 0.5697,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.7852829694747925,
"rewards/margins": 1.0229356288909912,
"rewards/rejected": -0.2376527041196823,
"step": 40
},
{
"epoch": 0.8411214953271028,
"grad_norm": 45.06757129062354,
"learning_rate": 8.699219550575952e-07,
"logits/chosen": -2.2773144245147705,
"logits/rejected": -2.2689876556396484,
"logps/chosen": -242.54403686523438,
"logps/rejected": -198.2088165283203,
"loss": 0.5913,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 1.0134313106536865,
"rewards/margins": 0.8711115121841431,
"rewards/rejected": 0.1423199325799942,
"step": 45
},
{
"epoch": 0.9345794392523364,
"grad_norm": 37.25379822397818,
"learning_rate": 8.324708461985124e-07,
"logits/chosen": -2.3120808601379395,
"logits/rejected": -2.325521945953369,
"logps/chosen": -244.45162963867188,
"logps/rejected": -233.9671173095703,
"loss": 0.5323,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.8873047828674316,
"rewards/margins": 1.0542502403259277,
"rewards/rejected": -0.16694557666778564,
"step": 50
},
{
"epoch": 0.9345794392523364,
"eval_logits/chosen": -2.3802032470703125,
"eval_logits/rejected": -2.3511736392974854,
"eval_logps/chosen": -257.5635070800781,
"eval_logps/rejected": -242.5312957763672,
"eval_loss": 0.5823682546615601,
"eval_rewards/accuracies": 0.7395833134651184,
"eval_rewards/chosen": 0.9205262660980225,
"eval_rewards/margins": 1.2713056802749634,
"eval_rewards/rejected": -0.35077938437461853,
"eval_runtime": 101.9106,
"eval_samples_per_second": 14.915,
"eval_steps_per_second": 0.236,
"step": 50
},
{
"epoch": 1.02803738317757,
"grad_norm": 17.588268217174,
"learning_rate": 7.913281043133977e-07,
"logits/chosen": -2.377732038497925,
"logits/rejected": -2.4019691944122314,
"logps/chosen": -251.0818328857422,
"logps/rejected": -231.4687957763672,
"loss": 0.4699,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.248857021331787,
"rewards/margins": 1.5204874277114868,
"rewards/rejected": -0.2716304361820221,
"step": 55
},
{
"epoch": 1.1214953271028036,
"grad_norm": 17.6740148393279,
"learning_rate": 7.469505631561317e-07,
"logits/chosen": -2.4365363121032715,
"logits/rejected": -2.3999149799346924,
"logps/chosen": -235.5603790283203,
"logps/rejected": -217.8879852294922,
"loss": 0.2443,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 1.6067253351211548,
"rewards/margins": 2.39015531539917,
"rewards/rejected": -0.7834302186965942,
"step": 60
},
{
"epoch": 1.2149532710280373,
"grad_norm": 19.25307975460043,
"learning_rate": 6.998309744925411e-07,
"logits/chosen": -2.459545612335205,
"logits/rejected": -2.443091869354248,
"logps/chosen": -234.2629852294922,
"logps/rejected": -229.77243041992188,
"loss": 0.2014,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 2.0133259296417236,
"rewards/margins": 3.0357606410980225,
"rewards/rejected": -1.0224347114562988,
"step": 65
},
{
"epoch": 1.308411214953271,
"grad_norm": 18.33160467378068,
"learning_rate": 6.504925367674594e-07,
"logits/chosen": -2.5056710243225098,
"logits/rejected": -2.481133222579956,
"logps/chosen": -239.59219360351562,
"logps/rejected": -222.05429077148438,
"loss": 0.2154,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 1.9632623195648193,
"rewards/margins": 3.0751612186431885,
"rewards/rejected": -1.1118988990783691,
"step": 70
},
{
"epoch": 1.4018691588785046,
"grad_norm": 26.100443454167603,
"learning_rate": 5.994830857031499e-07,
"logits/chosen": -2.4787604808807373,
"logits/rejected": -2.477220058441162,
"logps/chosen": -242.30496215820312,
"logps/rejected": -246.5374298095703,
"loss": 0.2167,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 2.2068536281585693,
"rewards/margins": 3.7736289501190186,
"rewards/rejected": -1.5667749643325806,
"step": 75
},
{
"epoch": 1.4953271028037383,
"grad_norm": 23.947057848275417,
"learning_rate": 5.473690113345342e-07,
"logits/chosen": -2.4580140113830566,
"logits/rejected": -2.4340569972991943,
"logps/chosen": -232.0405731201172,
"logps/rejected": -232.50894165039062,
"loss": 0.2133,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.6286522150039673,
"rewards/margins": 3.344123363494873,
"rewards/rejected": -1.7154712677001953,
"step": 80
},
{
"epoch": 1.588785046728972,
"grad_norm": 25.173894130852474,
"learning_rate": 4.947289690242102e-07,
"logits/chosen": -2.3979992866516113,
"logits/rejected": -2.372950553894043,
"logps/chosen": -234.0653533935547,
"logps/rejected": -226.2272491455078,
"loss": 0.2526,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 2.000070333480835,
"rewards/margins": 3.399747371673584,
"rewards/rejected": -1.3996769189834595,
"step": 85
},
{
"epoch": 1.6822429906542056,
"grad_norm": 25.381019801326666,
"learning_rate": 4.421474542878194e-07,
"logits/chosen": -2.402013063430786,
"logits/rejected": -2.358625888824463,
"logps/chosen": -240.19235229492188,
"logps/rejected": -235.06942749023438,
"loss": 0.2381,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 2.031113624572754,
"rewards/margins": 3.7032268047332764,
"rewards/rejected": -1.6721128225326538,
"step": 90
},
{
"epoch": 1.7757009345794392,
"grad_norm": 22.605364041509517,
"learning_rate": 3.902083127725186e-07,
"logits/chosen": -2.3787314891815186,
"logits/rejected": -2.382676601409912,
"logps/chosen": -231.67434692382812,
"logps/rejected": -207.5229949951172,
"loss": 0.2348,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 2.045062780380249,
"rewards/margins": 3.560044765472412,
"rewards/rejected": -1.5149818658828735,
"step": 95
},
{
"epoch": 1.8691588785046729,
"grad_norm": 26.859387828548858,
"learning_rate": 3.394882574513519e-07,
"logits/chosen": -2.3952929973602295,
"logits/rejected": -2.3831348419189453,
"logps/chosen": -234.7982940673828,
"logps/rejected": -259.3247375488281,
"loss": 0.2441,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 1.9878575801849365,
"rewards/margins": 3.512976884841919,
"rewards/rejected": -1.525119423866272,
"step": 100
},
{
"epoch": 1.8691588785046729,
"eval_logits/chosen": -2.3957111835479736,
"eval_logits/rejected": -2.363419532775879,
"eval_logps/chosen": -256.04901123046875,
"eval_logps/rejected": -246.68414306640625,
"eval_loss": 0.5841386318206787,
"eval_rewards/accuracies": 0.7708333134651184,
"eval_rewards/chosen": 1.071976661682129,
"eval_rewards/margins": 1.8380416631698608,
"eval_rewards/rejected": -0.7660649418830872,
"eval_runtime": 100.9496,
"eval_samples_per_second": 15.057,
"eval_steps_per_second": 0.238,
"step": 100
}
],
"logging_steps": 5,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1178822762299392.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}
|