{ "best_metric": 0.8163072349117797, "best_model_checkpoint": "output/arabert-2stage/checkpoint-4000", "epoch": 0.6798096532970768, "eval_steps": 500, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001699524133242692, "grad_norm": 18.25199317932129, "learning_rate": 1.3582342954159594e-07, "loss": 4.8813, "step": 10 }, { "epoch": 0.003399048266485384, "grad_norm": 22.529903411865234, "learning_rate": 3.0560271646859084e-07, "loss": 4.555, "step": 20 }, { "epoch": 0.005098572399728076, "grad_norm": 16.15692901611328, "learning_rate": 4.7538200339558575e-07, "loss": 4.5007, "step": 30 }, { "epoch": 0.006798096532970768, "grad_norm": 15.619988441467285, "learning_rate": 6.451612903225807e-07, "loss": 3.9173, "step": 40 }, { "epoch": 0.00849762066621346, "grad_norm": 16.881023406982422, "learning_rate": 8.149405772495757e-07, "loss": 4.3681, "step": 50 }, { "epoch": 0.010197144799456152, "grad_norm": 16.038394927978516, "learning_rate": 9.67741935483871e-07, "loss": 4.2357, "step": 60 }, { "epoch": 0.011896668932698844, "grad_norm": 12.993019104003906, "learning_rate": 1.137521222410866e-06, "loss": 4.1423, "step": 70 }, { "epoch": 0.013596193065941536, "grad_norm": 15.696857452392578, "learning_rate": 1.307300509337861e-06, "loss": 3.8028, "step": 80 }, { "epoch": 0.015295717199184228, "grad_norm": 10.482597351074219, "learning_rate": 1.477079796264856e-06, "loss": 3.8624, "step": 90 }, { "epoch": 0.01699524133242692, "grad_norm": 13.150389671325684, "learning_rate": 1.6468590831918508e-06, "loss": 3.5161, "step": 100 }, { "epoch": 0.018694765465669613, "grad_norm": 11.572787284851074, "learning_rate": 1.8166383701188457e-06, "loss": 3.4791, "step": 110 }, { "epoch": 0.020394289598912305, "grad_norm": 13.110374450683594, "learning_rate": 1.9864176570458403e-06, "loss": 3.6538, "step": 120 }, { "epoch": 0.022093813732154997, "grad_norm": 10.92455768585205, "learning_rate": 2.1561969439728354e-06, "loss": 2.8184, "step": 130 }, { "epoch": 0.02379333786539769, "grad_norm": 13.158881187438965, "learning_rate": 2.3259762308998304e-06, "loss": 3.321, "step": 140 }, { "epoch": 0.02549286199864038, "grad_norm": 7.724843502044678, "learning_rate": 2.4957555178268255e-06, "loss": 2.7731, "step": 150 }, { "epoch": 0.027192386131883073, "grad_norm": 9.516618728637695, "learning_rate": 2.6655348047538205e-06, "loss": 3.0967, "step": 160 }, { "epoch": 0.028891910265125765, "grad_norm": 8.883516311645508, "learning_rate": 2.835314091680815e-06, "loss": 2.8666, "step": 170 }, { "epoch": 0.030591434398368457, "grad_norm": 7.585137844085693, "learning_rate": 3.0050933786078102e-06, "loss": 2.2543, "step": 180 }, { "epoch": 0.032290958531611146, "grad_norm": 11.794560432434082, "learning_rate": 3.174872665534805e-06, "loss": 3.1811, "step": 190 }, { "epoch": 0.03399048266485384, "grad_norm": 8.631832122802734, "learning_rate": 3.3446519524618e-06, "loss": 2.5646, "step": 200 }, { "epoch": 0.03569000679809653, "grad_norm": 7.68147611618042, "learning_rate": 3.5144312393887946e-06, "loss": 2.8453, "step": 210 }, { "epoch": 0.037389530931339225, "grad_norm": 9.345367431640625, "learning_rate": 3.6842105263157896e-06, "loss": 3.0061, "step": 220 }, { "epoch": 0.039089055064581914, "grad_norm": 10.231043815612793, "learning_rate": 3.853989813242784e-06, "loss": 2.5211, "step": 230 }, { "epoch": 0.04078857919782461, "grad_norm": 6.374863147735596, "learning_rate": 4.02376910016978e-06, "loss": 2.6467, "step": 240 }, { "epoch": 0.0424881033310673, "grad_norm": 12.607449531555176, "learning_rate": 4.193548387096774e-06, "loss": 2.1417, "step": 250 }, { "epoch": 0.04418762746430999, "grad_norm": 12.550193786621094, "learning_rate": 4.36332767402377e-06, "loss": 2.7977, "step": 260 }, { "epoch": 0.04588715159755268, "grad_norm": 9.875368118286133, "learning_rate": 4.5331069609507645e-06, "loss": 2.8345, "step": 270 }, { "epoch": 0.04758667573079538, "grad_norm": 14.792099952697754, "learning_rate": 4.702886247877759e-06, "loss": 2.8812, "step": 280 }, { "epoch": 0.049286199864038066, "grad_norm": 11.13675308227539, "learning_rate": 4.872665534804754e-06, "loss": 2.493, "step": 290 }, { "epoch": 0.05098572399728076, "grad_norm": 15.685995101928711, "learning_rate": 5.042444821731749e-06, "loss": 2.5967, "step": 300 }, { "epoch": 0.05268524813052345, "grad_norm": 8.819021224975586, "learning_rate": 5.212224108658745e-06, "loss": 2.4955, "step": 310 }, { "epoch": 0.054384772263766146, "grad_norm": 12.058821678161621, "learning_rate": 5.3820033955857386e-06, "loss": 2.2387, "step": 320 }, { "epoch": 0.056084296397008834, "grad_norm": 9.35466194152832, "learning_rate": 5.551782682512734e-06, "loss": 2.6786, "step": 330 }, { "epoch": 0.05778382053025153, "grad_norm": 9.817248344421387, "learning_rate": 5.721561969439729e-06, "loss": 2.2292, "step": 340 }, { "epoch": 0.05948334466349422, "grad_norm": 7.38469934463501, "learning_rate": 5.891341256366724e-06, "loss": 1.8386, "step": 350 }, { "epoch": 0.061182868796736914, "grad_norm": 7.639986038208008, "learning_rate": 6.061120543293718e-06, "loss": 2.5317, "step": 360 }, { "epoch": 0.0628823929299796, "grad_norm": 14.127429962158203, "learning_rate": 6.2308998302207134e-06, "loss": 2.3225, "step": 370 }, { "epoch": 0.06458191706322229, "grad_norm": 6.480667591094971, "learning_rate": 6.400679117147709e-06, "loss": 2.5349, "step": 380 }, { "epoch": 0.066281441196465, "grad_norm": 12.269805908203125, "learning_rate": 6.5704584040747036e-06, "loss": 2.5192, "step": 390 }, { "epoch": 0.06798096532970768, "grad_norm": 10.89499568939209, "learning_rate": 6.740237691001699e-06, "loss": 2.6806, "step": 400 }, { "epoch": 0.06968048946295037, "grad_norm": 6.393769264221191, "learning_rate": 6.910016977928693e-06, "loss": 2.593, "step": 410 }, { "epoch": 0.07138001359619306, "grad_norm": 10.500598907470703, "learning_rate": 7.079796264855688e-06, "loss": 1.893, "step": 420 }, { "epoch": 0.07307953772943576, "grad_norm": 9.394804954528809, "learning_rate": 7.249575551782683e-06, "loss": 2.4957, "step": 430 }, { "epoch": 0.07477906186267845, "grad_norm": 8.060555458068848, "learning_rate": 7.4193548387096784e-06, "loss": 2.2887, "step": 440 }, { "epoch": 0.07647858599592114, "grad_norm": 7.361180305480957, "learning_rate": 7.589134125636672e-06, "loss": 1.8902, "step": 450 }, { "epoch": 0.07817811012916383, "grad_norm": 9.406976699829102, "learning_rate": 7.758913412563669e-06, "loss": 2.445, "step": 460 }, { "epoch": 0.07987763426240653, "grad_norm": 6.804340362548828, "learning_rate": 7.928692699490664e-06, "loss": 2.3188, "step": 470 }, { "epoch": 0.08157715839564922, "grad_norm": 8.055757522583008, "learning_rate": 8.098471986417658e-06, "loss": 1.7857, "step": 480 }, { "epoch": 0.08327668252889191, "grad_norm": 14.029518127441406, "learning_rate": 8.268251273344653e-06, "loss": 1.9323, "step": 490 }, { "epoch": 0.0849762066621346, "grad_norm": 7.238176345825195, "learning_rate": 8.438030560271647e-06, "loss": 2.2119, "step": 500 }, { "epoch": 0.0849762066621346, "eval_cosine_accuracy@1": 0.644, "eval_cosine_accuracy@10": 0.89, "eval_cosine_accuracy@3": 0.7905, "eval_cosine_accuracy@5": 0.842, "eval_cosine_map@100": 0.7319568991974511, "eval_cosine_mrr@10": 0.727909523809523, "eval_cosine_ndcg@10": 0.7672958038269853, "eval_cosine_precision@1": 0.644, "eval_cosine_precision@10": 0.089, "eval_cosine_precision@3": 0.2635, "eval_cosine_precision@5": 0.1684, "eval_cosine_recall@1": 0.644, "eval_cosine_recall@10": 0.89, "eval_cosine_recall@3": 0.7905, "eval_cosine_recall@5": 0.842, "eval_loss": 2.020264148712158, "eval_runtime": 2.7651, "eval_samples_per_second": 272.684, "eval_sequential_score": 0.7319568991974511, "eval_steps_per_second": 2.17, "eval_sts-dev_pearson_cosine": 0.7945015650907715, "eval_sts-dev_pearson_dot": 0.7892461139806375, "eval_sts-dev_pearson_euclidean": 0.7772212795710262, "eval_sts-dev_pearson_manhattan": 0.7758198419084321, "eval_sts-dev_pearson_max": 0.7945015650907715, "eval_sts-dev_spearman_cosine": 0.8029369749021982, "eval_sts-dev_spearman_dot": 0.7981715789995407, "eval_sts-dev_spearman_euclidean": 0.7974969539844081, "eval_sts-dev_spearman_manhattan": 0.793442700340276, "eval_sts-dev_spearman_max": 0.8029369749021982, "step": 500 }, { "epoch": 0.0866757307953773, "grad_norm": 8.096364974975586, "learning_rate": 8.607809847198643e-06, "loss": 2.4202, "step": 510 }, { "epoch": 0.08837525492861999, "grad_norm": 5.928977012634277, "learning_rate": 8.777589134125636e-06, "loss": 1.9747, "step": 520 }, { "epoch": 0.09007477906186268, "grad_norm": 5.913745880126953, "learning_rate": 8.947368421052632e-06, "loss": 2.727, "step": 530 }, { "epoch": 0.09177430319510536, "grad_norm": 11.565766334533691, "learning_rate": 9.117147707979627e-06, "loss": 2.2645, "step": 540 }, { "epoch": 0.09347382732834807, "grad_norm": 5.834039688110352, "learning_rate": 9.286926994906623e-06, "loss": 2.5484, "step": 550 }, { "epoch": 0.09517335146159075, "grad_norm": 9.030844688415527, "learning_rate": 9.456706281833618e-06, "loss": 2.0806, "step": 560 }, { "epoch": 0.09687287559483344, "grad_norm": 8.642946243286133, "learning_rate": 9.626485568760612e-06, "loss": 2.4332, "step": 570 }, { "epoch": 0.09857239972807613, "grad_norm": 8.626252174377441, "learning_rate": 9.796264855687608e-06, "loss": 1.866, "step": 580 }, { "epoch": 0.10027192386131883, "grad_norm": 7.73045539855957, "learning_rate": 9.966044142614601e-06, "loss": 2.1285, "step": 590 }, { "epoch": 0.10197144799456152, "grad_norm": 8.957172393798828, "learning_rate": 9.999943676843767e-06, "loss": 2.3799, "step": 600 }, { "epoch": 0.10367097212780421, "grad_norm": 7.423664093017578, "learning_rate": 9.999714866196319e-06, "loss": 2.6725, "step": 610 }, { "epoch": 0.1053704962610469, "grad_norm": 9.436025619506836, "learning_rate": 9.999310055908776e-06, "loss": 2.4767, "step": 620 }, { "epoch": 0.1070700203942896, "grad_norm": 8.621382713317871, "learning_rate": 9.998729260231243e-06, "loss": 1.9994, "step": 630 }, { "epoch": 0.10876954452753229, "grad_norm": 8.81654167175293, "learning_rate": 9.997972499608852e-06, "loss": 2.6699, "step": 640 }, { "epoch": 0.11046906866077498, "grad_norm": 6.4313154220581055, "learning_rate": 9.997039800681044e-06, "loss": 2.2264, "step": 650 }, { "epoch": 0.11216859279401767, "grad_norm": 8.586142539978027, "learning_rate": 9.995931196280622e-06, "loss": 2.0675, "step": 660 }, { "epoch": 0.11386811692726037, "grad_norm": 6.31871223449707, "learning_rate": 9.994646725432611e-06, "loss": 1.7862, "step": 670 }, { "epoch": 0.11556764106050306, "grad_norm": 9.907938957214355, "learning_rate": 9.993186433352867e-06, "loss": 2.5078, "step": 680 }, { "epoch": 0.11726716519374575, "grad_norm": 5.77314567565918, "learning_rate": 9.9915503714465e-06, "loss": 2.135, "step": 690 }, { "epoch": 0.11896668932698844, "grad_norm": 15.68774700164795, "learning_rate": 9.989738597306053e-06, "loss": 2.5356, "step": 700 }, { "epoch": 0.12066621346023114, "grad_norm": 13.062637329101562, "learning_rate": 9.987751174709489e-06, "loss": 2.4131, "step": 710 }, { "epoch": 0.12236573759347383, "grad_norm": 6.619427680969238, "learning_rate": 9.98558817361792e-06, "loss": 2.0465, "step": 720 }, { "epoch": 0.12406526172671652, "grad_norm": 10.573486328125, "learning_rate": 9.98324967017318e-06, "loss": 2.5195, "step": 730 }, { "epoch": 0.1257647858599592, "grad_norm": 10.98731803894043, "learning_rate": 9.98073574669511e-06, "loss": 2.3977, "step": 740 }, { "epoch": 0.1274643099932019, "grad_norm": 7.568258285522461, "learning_rate": 9.978046491678686e-06, "loss": 1.8389, "step": 750 }, { "epoch": 0.12916383412644458, "grad_norm": 6.886653423309326, "learning_rate": 9.975181999790888e-06, "loss": 2.1653, "step": 760 }, { "epoch": 0.13086335825968728, "grad_norm": 8.51340103149414, "learning_rate": 9.972142371867375e-06, "loss": 2.3171, "step": 770 }, { "epoch": 0.13256288239293, "grad_norm": 13.55966567993164, "learning_rate": 9.968927714908934e-06, "loss": 2.3126, "step": 780 }, { "epoch": 0.13426240652617266, "grad_norm": 6.183629035949707, "learning_rate": 9.965538142077709e-06, "loss": 1.8593, "step": 790 }, { "epoch": 0.13596193065941536, "grad_norm": 8.71070671081543, "learning_rate": 9.961973772693226e-06, "loss": 2.3248, "step": 800 }, { "epoch": 0.13766145479265807, "grad_norm": 6.578171253204346, "learning_rate": 9.958234732228184e-06, "loss": 1.4786, "step": 810 }, { "epoch": 0.13936097892590074, "grad_norm": 8.017528533935547, "learning_rate": 9.954321152304049e-06, "loss": 2.5924, "step": 820 }, { "epoch": 0.14106050305914344, "grad_norm": 9.630891799926758, "learning_rate": 9.950233170686404e-06, "loss": 2.7052, "step": 830 }, { "epoch": 0.14276002719238612, "grad_norm": 9.43925952911377, "learning_rate": 9.945970931280117e-06, "loss": 2.9104, "step": 840 }, { "epoch": 0.14445955132562882, "grad_norm": 7.2036967277526855, "learning_rate": 9.941534584124262e-06, "loss": 1.905, "step": 850 }, { "epoch": 0.14615907545887152, "grad_norm": 8.345267295837402, "learning_rate": 9.936924285386849e-06, "loss": 1.4475, "step": 860 }, { "epoch": 0.1478585995921142, "grad_norm": 6.2061614990234375, "learning_rate": 9.932140197359312e-06, "loss": 1.9715, "step": 870 }, { "epoch": 0.1495581237253569, "grad_norm": 8.147466659545898, "learning_rate": 9.927182488450813e-06, "loss": 1.922, "step": 880 }, { "epoch": 0.1512576478585996, "grad_norm": 12.269794464111328, "learning_rate": 9.9220513331823e-06, "loss": 2.5347, "step": 890 }, { "epoch": 0.15295717199184228, "grad_norm": 4.720547199249268, "learning_rate": 9.916746912180369e-06, "loss": 1.9091, "step": 900 }, { "epoch": 0.15465669612508498, "grad_norm": 10.795013427734375, "learning_rate": 9.911269412170906e-06, "loss": 2.127, "step": 910 }, { "epoch": 0.15635622025832766, "grad_norm": 9.331660270690918, "learning_rate": 9.905619025972513e-06, "loss": 2.0569, "step": 920 }, { "epoch": 0.15805574439157036, "grad_norm": 8.150259017944336, "learning_rate": 9.89979595248972e-06, "loss": 1.5199, "step": 930 }, { "epoch": 0.15975526852481306, "grad_norm": 5.694140434265137, "learning_rate": 9.893800396705985e-06, "loss": 2.1115, "step": 940 }, { "epoch": 0.16145479265805573, "grad_norm": 6.206644535064697, "learning_rate": 9.887632569676475e-06, "loss": 2.3499, "step": 950 }, { "epoch": 0.16315431679129844, "grad_norm": 4.6663408279418945, "learning_rate": 9.881292688520638e-06, "loss": 2.2428, "step": 960 }, { "epoch": 0.16485384092454114, "grad_norm": 7.22851037979126, "learning_rate": 9.874780976414565e-06, "loss": 2.3808, "step": 970 }, { "epoch": 0.16655336505778381, "grad_norm": 5.55798864364624, "learning_rate": 9.86809766258312e-06, "loss": 2.0025, "step": 980 }, { "epoch": 0.16825288919102652, "grad_norm": 5.861523628234863, "learning_rate": 9.86124298229189e-06, "loss": 2.0427, "step": 990 }, { "epoch": 0.1699524133242692, "grad_norm": 6.302640438079834, "learning_rate": 9.854217176838886e-06, "loss": 1.8858, "step": 1000 }, { "epoch": 0.1699524133242692, "eval_cosine_accuracy@1": 0.682, "eval_cosine_accuracy@10": 0.906, "eval_cosine_accuracy@3": 0.8185, "eval_cosine_accuracy@5": 0.858, "eval_cosine_map@100": 0.762636433194321, "eval_cosine_mrr@10": 0.7592498015873005, "eval_cosine_ndcg@10": 0.7948932488118442, "eval_cosine_precision@1": 0.682, "eval_cosine_precision@10": 0.09060000000000001, "eval_cosine_precision@3": 0.2728333333333333, "eval_cosine_precision@5": 0.1716, "eval_cosine_recall@1": 0.682, "eval_cosine_recall@10": 0.906, "eval_cosine_recall@3": 0.8185, "eval_cosine_recall@5": 0.858, "eval_loss": 1.8376002311706543, "eval_runtime": 2.7558, "eval_samples_per_second": 273.604, "eval_sequential_score": 0.762636433194321, "eval_steps_per_second": 2.177, "eval_sts-dev_pearson_cosine": 0.7985157354895591, "eval_sts-dev_pearson_dot": 0.7942162925984169, "eval_sts-dev_pearson_euclidean": 0.7855985286403331, "eval_sts-dev_pearson_manhattan": 0.7854261466194248, "eval_sts-dev_pearson_max": 0.7985157354895591, "eval_sts-dev_spearman_cosine": 0.8055793147203388, "eval_sts-dev_spearman_dot": 0.8010661505883895, "eval_sts-dev_spearman_euclidean": 0.8034068438279502, "eval_sts-dev_spearman_manhattan": 0.8018568915859646, "eval_sts-dev_spearman_max": 0.8055793147203388, "step": 1000 }, { "epoch": 0.1716519374575119, "grad_norm": 7.927140235900879, "learning_rate": 9.847020493546058e-06, "loss": 1.9554, "step": 1010 }, { "epoch": 0.1733514615907546, "grad_norm": 8.299236297607422, "learning_rate": 9.839653185750588e-06, "loss": 2.2341, "step": 1020 }, { "epoch": 0.17505098572399727, "grad_norm": 8.708532333374023, "learning_rate": 9.83211551279597e-06, "loss": 2.4421, "step": 1030 }, { "epoch": 0.17675050985723997, "grad_norm": 9.657612800598145, "learning_rate": 9.824407740022878e-06, "loss": 1.8227, "step": 1040 }, { "epoch": 0.17845003399048268, "grad_norm": 6.149839401245117, "learning_rate": 9.816530138759837e-06, "loss": 2.1157, "step": 1050 }, { "epoch": 0.18014955812372535, "grad_norm": 5.039497375488281, "learning_rate": 9.808482986313653e-06, "loss": 1.7899, "step": 1060 }, { "epoch": 0.18184908225696805, "grad_norm": 8.380406379699707, "learning_rate": 9.80026656595967e-06, "loss": 1.8153, "step": 1070 }, { "epoch": 0.18354860639021073, "grad_norm": 5.239027500152588, "learning_rate": 9.791881166931788e-06, "loss": 2.0095, "step": 1080 }, { "epoch": 0.18524813052345343, "grad_norm": 6.085160732269287, "learning_rate": 9.783327084412277e-06, "loss": 1.5087, "step": 1090 }, { "epoch": 0.18694765465669613, "grad_norm": 5.803791522979736, "learning_rate": 9.774604619521404e-06, "loss": 1.9106, "step": 1100 }, { "epoch": 0.1886471787899388, "grad_norm": 5.130899906158447, "learning_rate": 9.765714079306814e-06, "loss": 2.0189, "step": 1110 }, { "epoch": 0.1903467029231815, "grad_norm": 13.715828895568848, "learning_rate": 9.756655776732727e-06, "loss": 2.3282, "step": 1120 }, { "epoch": 0.1920462270564242, "grad_norm": 9.30579662322998, "learning_rate": 9.74743003066893e-06, "loss": 2.147, "step": 1130 }, { "epoch": 0.1937457511896669, "grad_norm": 6.4321675300598145, "learning_rate": 9.738037165879538e-06, "loss": 1.9017, "step": 1140 }, { "epoch": 0.1954452753229096, "grad_norm": 5.368739128112793, "learning_rate": 9.72847751301157e-06, "loss": 2.102, "step": 1150 }, { "epoch": 0.19714479945615226, "grad_norm": 5.458316802978516, "learning_rate": 9.718751408583312e-06, "loss": 2.1275, "step": 1160 }, { "epoch": 0.19884432358939497, "grad_norm": 8.618191719055176, "learning_rate": 9.708859194972462e-06, "loss": 2.6718, "step": 1170 }, { "epoch": 0.20054384772263767, "grad_norm": 7.549973487854004, "learning_rate": 9.698801220404086e-06, "loss": 1.8549, "step": 1180 }, { "epoch": 0.20224337185588034, "grad_norm": 5.044092655181885, "learning_rate": 9.688577838938358e-06, "loss": 2.1802, "step": 1190 }, { "epoch": 0.20394289598912305, "grad_norm": 5.195682048797607, "learning_rate": 9.67818941045809e-06, "loss": 2.2905, "step": 1200 }, { "epoch": 0.20564242012236575, "grad_norm": 11.789019584655762, "learning_rate": 9.667636300656073e-06, "loss": 2.283, "step": 1210 }, { "epoch": 0.20734194425560842, "grad_norm": 7.097940921783447, "learning_rate": 9.656918881022196e-06, "loss": 2.1976, "step": 1220 }, { "epoch": 0.20904146838885113, "grad_norm": 10.327863693237305, "learning_rate": 9.646037528830374e-06, "loss": 2.5894, "step": 1230 }, { "epoch": 0.2107409925220938, "grad_norm": 6.603916645050049, "learning_rate": 9.634992627125264e-06, "loss": 1.7659, "step": 1240 }, { "epoch": 0.2124405166553365, "grad_norm": 4.55158805847168, "learning_rate": 9.623784564708782e-06, "loss": 1.9581, "step": 1250 }, { "epoch": 0.2141400407885792, "grad_norm": 6.308363437652588, "learning_rate": 9.612413736126421e-06, "loss": 2.1419, "step": 1260 }, { "epoch": 0.21583956492182188, "grad_norm": 5.769819259643555, "learning_rate": 9.600880541653352e-06, "loss": 1.7988, "step": 1270 }, { "epoch": 0.21753908905506458, "grad_norm": 9.009718894958496, "learning_rate": 9.589185387280343e-06, "loss": 2.3537, "step": 1280 }, { "epoch": 0.21923861318830729, "grad_norm": 10.911920547485352, "learning_rate": 9.577328684699468e-06, "loss": 2.2776, "step": 1290 }, { "epoch": 0.22093813732154996, "grad_norm": 7.810164928436279, "learning_rate": 9.565310851289602e-06, "loss": 2.0633, "step": 1300 }, { "epoch": 0.22263766145479266, "grad_norm": 7.256144046783447, "learning_rate": 9.553132310101741e-06, "loss": 2.247, "step": 1310 }, { "epoch": 0.22433718558803534, "grad_norm": 8.476386070251465, "learning_rate": 9.540793489844106e-06, "loss": 1.6592, "step": 1320 }, { "epoch": 0.22603670972127804, "grad_norm": 7.557313919067383, "learning_rate": 9.528294824867054e-06, "loss": 1.8965, "step": 1330 }, { "epoch": 0.22773623385452074, "grad_norm": 5.973213195800781, "learning_rate": 9.515636755147781e-06, "loss": 2.3415, "step": 1340 }, { "epoch": 0.22943575798776342, "grad_norm": 6.3328471183776855, "learning_rate": 9.50281972627484e-06, "loss": 1.7645, "step": 1350 }, { "epoch": 0.23113528212100612, "grad_norm": 12.455780982971191, "learning_rate": 9.489844189432456e-06, "loss": 1.9076, "step": 1360 }, { "epoch": 0.23283480625424882, "grad_norm": 6.662944316864014, "learning_rate": 9.476710601384639e-06, "loss": 2.0724, "step": 1370 }, { "epoch": 0.2345343303874915, "grad_norm": 8.458195686340332, "learning_rate": 9.463419424459108e-06, "loss": 2.0766, "step": 1380 }, { "epoch": 0.2362338545207342, "grad_norm": 5.073389530181885, "learning_rate": 9.449971126531015e-06, "loss": 1.6035, "step": 1390 }, { "epoch": 0.23793337865397687, "grad_norm": 9.805305480957031, "learning_rate": 9.436366181006476e-06, "loss": 2.1389, "step": 1400 }, { "epoch": 0.23963290278721958, "grad_norm": 5.322582244873047, "learning_rate": 9.422605066805906e-06, "loss": 1.8332, "step": 1410 }, { "epoch": 0.24133242692046228, "grad_norm": 6.252927780151367, "learning_rate": 9.408688268347157e-06, "loss": 1.8316, "step": 1420 }, { "epoch": 0.24303195105370495, "grad_norm": 6.532680034637451, "learning_rate": 9.394616275528475e-06, "loss": 1.754, "step": 1430 }, { "epoch": 0.24473147518694766, "grad_norm": 10.015703201293945, "learning_rate": 9.380389583711238e-06, "loss": 2.0633, "step": 1440 }, { "epoch": 0.24643099932019036, "grad_norm": 4.93398904800415, "learning_rate": 9.366008693702535e-06, "loss": 1.9549, "step": 1450 }, { "epoch": 0.24813052345343303, "grad_norm": 10.338335037231445, "learning_rate": 9.351474111737531e-06, "loss": 2.1118, "step": 1460 }, { "epoch": 0.24983004758667574, "grad_norm": 6.729907512664795, "learning_rate": 9.336786349461637e-06, "loss": 2.1022, "step": 1470 }, { "epoch": 0.2515295717199184, "grad_norm": 6.920368194580078, "learning_rate": 9.321945923912518e-06, "loss": 1.7638, "step": 1480 }, { "epoch": 0.2532290958531611, "grad_norm": 8.145246505737305, "learning_rate": 9.306953357501874e-06, "loss": 2.3552, "step": 1490 }, { "epoch": 0.2549286199864038, "grad_norm": 7.566394805908203, "learning_rate": 9.291809177997061e-06, "loss": 2.3227, "step": 1500 }, { "epoch": 0.2549286199864038, "eval_cosine_accuracy@1": 0.6785, "eval_cosine_accuracy@10": 0.909, "eval_cosine_accuracy@3": 0.8225, "eval_cosine_accuracy@5": 0.863, "eval_cosine_map@100": 0.7639697235001636, "eval_cosine_mrr@10": 0.7603734126984125, "eval_cosine_ndcg@10": 0.7966821763190639, "eval_cosine_precision@1": 0.6785, "eval_cosine_precision@10": 0.09090000000000001, "eval_cosine_precision@3": 0.2741666666666666, "eval_cosine_precision@5": 0.17260000000000003, "eval_cosine_recall@1": 0.6785, "eval_cosine_recall@10": 0.909, "eval_cosine_recall@3": 0.8225, "eval_cosine_recall@5": 0.863, "eval_loss": 1.7280288934707642, "eval_runtime": 2.966, "eval_samples_per_second": 254.217, "eval_sequential_score": 0.7639697235001636, "eval_steps_per_second": 2.023, "eval_sts-dev_pearson_cosine": 0.7956596138775444, "eval_sts-dev_pearson_dot": 0.7898276993726723, "eval_sts-dev_pearson_euclidean": 0.7849266959832228, "eval_sts-dev_pearson_manhattan": 0.7850471763202945, "eval_sts-dev_pearson_max": 0.7956596138775444, "eval_sts-dev_spearman_cosine": 0.8035475738008022, "eval_sts-dev_spearman_dot": 0.7954777202652852, "eval_sts-dev_spearman_euclidean": 0.7982728913757071, "eval_sts-dev_spearman_manhattan": 0.8004630496417331, "eval_sts-dev_spearman_max": 0.8035475738008022, "step": 1500 }, { "epoch": 0.2566281441196465, "grad_norm": 8.947423934936523, "learning_rate": 9.276513918502508e-06, "loss": 2.3639, "step": 1510 }, { "epoch": 0.25832766825288916, "grad_norm": 7.069028377532959, "learning_rate": 9.261068117440955e-06, "loss": 1.717, "step": 1520 }, { "epoch": 0.26002719238613187, "grad_norm": 9.209027290344238, "learning_rate": 9.24547231853449e-06, "loss": 2.0703, "step": 1530 }, { "epoch": 0.26172671651937457, "grad_norm": 6.722319602966309, "learning_rate": 9.229727070785423e-06, "loss": 1.9918, "step": 1540 }, { "epoch": 0.26342624065261727, "grad_norm": 7.064426422119141, "learning_rate": 9.21383292845695e-06, "loss": 2.0636, "step": 1550 }, { "epoch": 0.26512576478586, "grad_norm": 5.03215217590332, "learning_rate": 9.197790451053641e-06, "loss": 1.7062, "step": 1560 }, { "epoch": 0.2668252889191027, "grad_norm": 8.875913619995117, "learning_rate": 9.18160020330175e-06, "loss": 2.2969, "step": 1570 }, { "epoch": 0.2685248130523453, "grad_norm": 8.441971778869629, "learning_rate": 9.165262755129337e-06, "loss": 1.9633, "step": 1580 }, { "epoch": 0.270224337185588, "grad_norm": 6.123201847076416, "learning_rate": 9.148778681646196e-06, "loss": 1.8068, "step": 1590 }, { "epoch": 0.27192386131883073, "grad_norm": 4.153637886047363, "learning_rate": 9.132148563123617e-06, "loss": 1.6155, "step": 1600 }, { "epoch": 0.27362338545207343, "grad_norm": 5.999012470245361, "learning_rate": 9.115372984973963e-06, "loss": 1.6978, "step": 1610 }, { "epoch": 0.27532290958531613, "grad_norm": 7.118943214416504, "learning_rate": 9.098452537730049e-06, "loss": 2.1703, "step": 1620 }, { "epoch": 0.2770224337185588, "grad_norm": 7.293047904968262, "learning_rate": 9.081387817024371e-06, "loss": 2.5395, "step": 1630 }, { "epoch": 0.2787219578518015, "grad_norm": 4.569684982299805, "learning_rate": 9.064179423568122e-06, "loss": 1.7744, "step": 1640 }, { "epoch": 0.2804214819850442, "grad_norm": 5.9083404541015625, "learning_rate": 9.046827963130063e-06, "loss": 1.7637, "step": 1650 }, { "epoch": 0.2821210061182869, "grad_norm": 7.332223892211914, "learning_rate": 9.02933404651518e-06, "loss": 1.9622, "step": 1660 }, { "epoch": 0.2838205302515296, "grad_norm": 6.511028289794922, "learning_rate": 9.011698289543199e-06, "loss": 1.3597, "step": 1670 }, { "epoch": 0.28552005438477224, "grad_norm": 6.4721832275390625, "learning_rate": 8.9939213130269e-06, "loss": 1.6605, "step": 1680 }, { "epoch": 0.28721957851801494, "grad_norm": 7.247612953186035, "learning_rate": 8.97600374275026e-06, "loss": 2.166, "step": 1690 }, { "epoch": 0.28891910265125764, "grad_norm": 10.479696273803711, "learning_rate": 8.957946209446435e-06, "loss": 1.959, "step": 1700 }, { "epoch": 0.29061862678450034, "grad_norm": 6.533411026000977, "learning_rate": 8.939749348775544e-06, "loss": 2.0912, "step": 1710 }, { "epoch": 0.29231815091774305, "grad_norm": 8.847211837768555, "learning_rate": 8.921413801302305e-06, "loss": 2.0446, "step": 1720 }, { "epoch": 0.29401767505098575, "grad_norm": 6.280109882354736, "learning_rate": 8.902940212473477e-06, "loss": 2.1172, "step": 1730 }, { "epoch": 0.2957171991842284, "grad_norm": 6.145227909088135, "learning_rate": 8.884329232595139e-06, "loss": 1.8379, "step": 1740 }, { "epoch": 0.2974167233174711, "grad_norm": 6.113027095794678, "learning_rate": 8.865581516809806e-06, "loss": 2.0789, "step": 1750 }, { "epoch": 0.2991162474507138, "grad_norm": 8.188186645507812, "learning_rate": 8.846697725073359e-06, "loss": 1.8014, "step": 1760 }, { "epoch": 0.3008157715839565, "grad_norm": 7.898641109466553, "learning_rate": 8.827678522131815e-06, "loss": 1.7158, "step": 1770 }, { "epoch": 0.3025152957171992, "grad_norm": 8.399277687072754, "learning_rate": 8.80852457749793e-06, "loss": 1.785, "step": 1780 }, { "epoch": 0.30421481985044185, "grad_norm": 4.910240650177002, "learning_rate": 8.789236565427627e-06, "loss": 1.9891, "step": 1790 }, { "epoch": 0.30591434398368456, "grad_norm": 10.41693115234375, "learning_rate": 8.769815164896262e-06, "loss": 2.134, "step": 1800 }, { "epoch": 0.30761386811692726, "grad_norm": 8.666768074035645, "learning_rate": 8.750261059574722e-06, "loss": 1.8226, "step": 1810 }, { "epoch": 0.30931339225016996, "grad_norm": 4.843673229217529, "learning_rate": 8.730574937805363e-06, "loss": 1.9502, "step": 1820 }, { "epoch": 0.31101291638341266, "grad_norm": 6.370514392852783, "learning_rate": 8.71075749257777e-06, "loss": 2.0422, "step": 1830 }, { "epoch": 0.3127124405166553, "grad_norm": 4.965259552001953, "learning_rate": 8.690809421504375e-06, "loss": 1.7759, "step": 1840 }, { "epoch": 0.314411964649898, "grad_norm": 6.794595241546631, "learning_rate": 8.670731426795888e-06, "loss": 2.0413, "step": 1850 }, { "epoch": 0.3161114887831407, "grad_norm": 4.695109844207764, "learning_rate": 8.65052421523658e-06, "loss": 1.9104, "step": 1860 }, { "epoch": 0.3178110129163834, "grad_norm": 6.058196544647217, "learning_rate": 8.63018849815941e-06, "loss": 2.3626, "step": 1870 }, { "epoch": 0.3195105370496261, "grad_norm": 4.891868591308594, "learning_rate": 8.609724991420981e-06, "loss": 1.8832, "step": 1880 }, { "epoch": 0.3212100611828688, "grad_norm": 5.250426292419434, "learning_rate": 8.589134415376333e-06, "loss": 2.0618, "step": 1890 }, { "epoch": 0.32290958531611147, "grad_norm": 4.848729133605957, "learning_rate": 8.568417494853598e-06, "loss": 2.0059, "step": 1900 }, { "epoch": 0.32460910944935417, "grad_norm": 8.6367769241333, "learning_rate": 8.547574959128472e-06, "loss": 1.9333, "step": 1910 }, { "epoch": 0.3263086335825969, "grad_norm": 5.955835342407227, "learning_rate": 8.526607541898555e-06, "loss": 2.3307, "step": 1920 }, { "epoch": 0.3280081577158396, "grad_norm": 7.700503349304199, "learning_rate": 8.505515981257515e-06, "loss": 2.3562, "step": 1930 }, { "epoch": 0.3297076818490823, "grad_norm": 6.47861909866333, "learning_rate": 8.48430101966911e-06, "loss": 1.7028, "step": 1940 }, { "epoch": 0.3314072059823249, "grad_norm": 6.167905807495117, "learning_rate": 8.462963403941046e-06, "loss": 1.6633, "step": 1950 }, { "epoch": 0.33310673011556763, "grad_norm": 5.260265827178955, "learning_rate": 8.441503885198699e-06, "loss": 1.6875, "step": 1960 }, { "epoch": 0.33480625424881033, "grad_norm": 5.841998100280762, "learning_rate": 8.419923218858658e-06, "loss": 1.9344, "step": 1970 }, { "epoch": 0.33650577838205303, "grad_norm": 9.941591262817383, "learning_rate": 8.398222164602151e-06, "loss": 2.1813, "step": 1980 }, { "epoch": 0.33820530251529574, "grad_norm": 8.419736862182617, "learning_rate": 8.376401486348286e-06, "loss": 1.9077, "step": 1990 }, { "epoch": 0.3399048266485384, "grad_norm": 6.201753616333008, "learning_rate": 8.354461952227175e-06, "loss": 1.9249, "step": 2000 }, { "epoch": 0.3399048266485384, "eval_cosine_accuracy@1": 0.695, "eval_cosine_accuracy@10": 0.9095, "eval_cosine_accuracy@3": 0.831, "eval_cosine_accuracy@5": 0.871, "eval_cosine_map@100": 0.7742690226366353, "eval_cosine_mrr@10": 0.7707448412698402, "eval_cosine_ndcg@10": 0.8045879491595074, "eval_cosine_precision@1": 0.695, "eval_cosine_precision@10": 0.09095000000000002, "eval_cosine_precision@3": 0.277, "eval_cosine_precision@5": 0.17420000000000002, "eval_cosine_recall@1": 0.695, "eval_cosine_recall@10": 0.9095, "eval_cosine_recall@3": 0.831, "eval_cosine_recall@5": 0.871, "eval_loss": 1.7007904052734375, "eval_runtime": 2.7948, "eval_samples_per_second": 269.79, "eval_sequential_score": 0.7742690226366353, "eval_steps_per_second": 2.147, "eval_sts-dev_pearson_cosine": 0.7952822056751716, "eval_sts-dev_pearson_dot": 0.7915034361455775, "eval_sts-dev_pearson_euclidean": 0.7874083556892966, "eval_sts-dev_pearson_manhattan": 0.7873169677574228, "eval_sts-dev_pearson_max": 0.7952822056751716, "eval_sts-dev_spearman_cosine": 0.8004780454585122, "eval_sts-dev_spearman_dot": 0.7972326119671326, "eval_sts-dev_spearman_euclidean": 0.7979533651258709, "eval_sts-dev_spearman_manhattan": 0.7990561344213346, "eval_sts-dev_spearman_max": 0.8004780454585122, "step": 2000 }, { "epoch": 0.3416043507817811, "grad_norm": 5.67038106918335, "learning_rate": 8.332404334552882e-06, "loss": 1.7941, "step": 2010 }, { "epoch": 0.3433038749150238, "grad_norm": 5.878876686096191, "learning_rate": 8.310229409796235e-06, "loss": 2.0737, "step": 2020 }, { "epoch": 0.3450033990482665, "grad_norm": 5.597275733947754, "learning_rate": 8.287937958557513e-06, "loss": 1.559, "step": 2030 }, { "epoch": 0.3467029231815092, "grad_norm": 6.382761478424072, "learning_rate": 8.265530765538938e-06, "loss": 1.3173, "step": 2040 }, { "epoch": 0.3484024473147519, "grad_norm": 7.027074813842773, "learning_rate": 8.243008619517074e-06, "loss": 2.1037, "step": 2050 }, { "epoch": 0.35010197144799454, "grad_norm": 6.432947158813477, "learning_rate": 8.220372313315053e-06, "loss": 2.27, "step": 2060 }, { "epoch": 0.35180149558123724, "grad_norm": 6.3885416984558105, "learning_rate": 8.197622643774663e-06, "loss": 1.7745, "step": 2070 }, { "epoch": 0.35350101971447995, "grad_norm": 6.885595798492432, "learning_rate": 8.174760411728308e-06, "loss": 1.3386, "step": 2080 }, { "epoch": 0.35520054384772265, "grad_norm": 7.397289752960205, "learning_rate": 8.1517864219708e-06, "loss": 1.9689, "step": 2090 }, { "epoch": 0.35690006798096535, "grad_norm": 4.5459885597229, "learning_rate": 8.128701483231048e-06, "loss": 1.699, "step": 2100 }, { "epoch": 0.358599592114208, "grad_norm": 4.440462589263916, "learning_rate": 8.105506408143571e-06, "loss": 1.8324, "step": 2110 }, { "epoch": 0.3602991162474507, "grad_norm": 5.693051338195801, "learning_rate": 8.082202013219912e-06, "loss": 1.8712, "step": 2120 }, { "epoch": 0.3619986403806934, "grad_norm": 7.692087650299072, "learning_rate": 8.058789118819872e-06, "loss": 2.4458, "step": 2130 }, { "epoch": 0.3636981645139361, "grad_norm": 5.480040073394775, "learning_rate": 8.03526854912265e-06, "loss": 1.7687, "step": 2140 }, { "epoch": 0.3653976886471788, "grad_norm": 7.727152347564697, "learning_rate": 8.011641132097822e-06, "loss": 1.9936, "step": 2150 }, { "epoch": 0.36709721278042146, "grad_norm": 5.268275260925293, "learning_rate": 7.987907699476202e-06, "loss": 1.7563, "step": 2160 }, { "epoch": 0.36879673691366416, "grad_norm": 12.53956413269043, "learning_rate": 7.96406908672055e-06, "loss": 2.0094, "step": 2170 }, { "epoch": 0.37049626104690686, "grad_norm": 6.193202495574951, "learning_rate": 7.940126132996172e-06, "loss": 2.0172, "step": 2180 }, { "epoch": 0.37219578518014956, "grad_norm": 8.85962200164795, "learning_rate": 7.916079681141385e-06, "loss": 1.9722, "step": 2190 }, { "epoch": 0.37389530931339227, "grad_norm": 8.260863304138184, "learning_rate": 7.891930577637832e-06, "loss": 1.8102, "step": 2200 }, { "epoch": 0.37559483344663497, "grad_norm": 7.650066375732422, "learning_rate": 7.867679672580696e-06, "loss": 2.1561, "step": 2210 }, { "epoch": 0.3772943575798776, "grad_norm": 9.542856216430664, "learning_rate": 7.843327819648774e-06, "loss": 2.1181, "step": 2220 }, { "epoch": 0.3789938817131203, "grad_norm": 9.622944831848145, "learning_rate": 7.818875876074421e-06, "loss": 1.9387, "step": 2230 }, { "epoch": 0.380693405846363, "grad_norm": 5.241047382354736, "learning_rate": 7.794324702613376e-06, "loss": 1.7689, "step": 2240 }, { "epoch": 0.3823929299796057, "grad_norm": 10.481565475463867, "learning_rate": 7.769675163514463e-06, "loss": 1.9585, "step": 2250 }, { "epoch": 0.3840924541128484, "grad_norm": 8.667862892150879, "learning_rate": 7.744928126489169e-06, "loss": 1.5681, "step": 2260 }, { "epoch": 0.38579197824609107, "grad_norm": 5.036750793457031, "learning_rate": 7.720084462681092e-06, "loss": 2.0402, "step": 2270 }, { "epoch": 0.3874915023793338, "grad_norm": 5.431954383850098, "learning_rate": 7.695145046635281e-06, "loss": 1.78, "step": 2280 }, { "epoch": 0.3891910265125765, "grad_norm": 7.302848815917969, "learning_rate": 7.670110756267455e-06, "loss": 1.9524, "step": 2290 }, { "epoch": 0.3908905506458192, "grad_norm": 8.023348808288574, "learning_rate": 7.644982472833083e-06, "loss": 1.9678, "step": 2300 }, { "epoch": 0.3925900747790619, "grad_norm": 7.136281967163086, "learning_rate": 7.61976108089638e-06, "loss": 1.6553, "step": 2310 }, { "epoch": 0.39428959891230453, "grad_norm": 8.916802406311035, "learning_rate": 7.594447468299156e-06, "loss": 1.8378, "step": 2320 }, { "epoch": 0.39598912304554723, "grad_norm": 8.570812225341797, "learning_rate": 7.56904252612957e-06, "loss": 1.8066, "step": 2330 }, { "epoch": 0.39768864717878993, "grad_norm": 10.376533508300781, "learning_rate": 7.543547148690757e-06, "loss": 1.8585, "step": 2340 }, { "epoch": 0.39938817131203264, "grad_norm": 5.894679546356201, "learning_rate": 7.517962233469345e-06, "loss": 1.4762, "step": 2350 }, { "epoch": 0.40108769544527534, "grad_norm": 6.78580904006958, "learning_rate": 7.492288681103872e-06, "loss": 1.6828, "step": 2360 }, { "epoch": 0.40278721957851804, "grad_norm": 6.131538391113281, "learning_rate": 7.4665273953530695e-06, "loss": 1.6288, "step": 2370 }, { "epoch": 0.4044867437117607, "grad_norm": 5.721993923187256, "learning_rate": 7.440679283064059e-06, "loss": 1.7679, "step": 2380 }, { "epoch": 0.4061862678450034, "grad_norm": 6.973949909210205, "learning_rate": 7.41474525414042e-06, "loss": 2.128, "step": 2390 }, { "epoch": 0.4078857919782461, "grad_norm": 6.6178202629089355, "learning_rate": 7.388726221510163e-06, "loss": 1.3543, "step": 2400 }, { "epoch": 0.4095853161114888, "grad_norm": 5.370213985443115, "learning_rate": 7.3626231010935974e-06, "loss": 1.806, "step": 2410 }, { "epoch": 0.4112848402447315, "grad_norm": 8.617044448852539, "learning_rate": 7.336436811771085e-06, "loss": 1.8597, "step": 2420 }, { "epoch": 0.41298436437797414, "grad_norm": 12.804397583007812, "learning_rate": 7.310168275350692e-06, "loss": 2.2319, "step": 2430 }, { "epoch": 0.41468388851121685, "grad_norm": 8.611510276794434, "learning_rate": 7.28381841653574e-06, "loss": 1.9433, "step": 2440 }, { "epoch": 0.41638341264445955, "grad_norm": 5.746379375457764, "learning_rate": 7.257388162892261e-06, "loss": 1.7124, "step": 2450 }, { "epoch": 0.41808293677770225, "grad_norm": 9.15091609954834, "learning_rate": 7.23087844481634e-06, "loss": 1.702, "step": 2460 }, { "epoch": 0.41978246091094495, "grad_norm": 5.277801513671875, "learning_rate": 7.204290195501358e-06, "loss": 1.5501, "step": 2470 }, { "epoch": 0.4214819850441876, "grad_norm": 9.658906936645508, "learning_rate": 7.177624350905155e-06, "loss": 2.0232, "step": 2480 }, { "epoch": 0.4231815091774303, "grad_norm": 5.400667667388916, "learning_rate": 7.153559522573959e-06, "loss": 1.9549, "step": 2490 }, { "epoch": 0.424881033310673, "grad_norm": 6.810690402984619, "learning_rate": 7.126748835263528e-06, "loss": 1.9216, "step": 2500 }, { "epoch": 0.424881033310673, "eval_cosine_accuracy@1": 0.706, "eval_cosine_accuracy@10": 0.912, "eval_cosine_accuracy@3": 0.8375, "eval_cosine_accuracy@5": 0.87, "eval_cosine_map@100": 0.782229618428024, "eval_cosine_mrr@10": 0.7788738095238087, "eval_cosine_ndcg@10": 0.811319534720964, "eval_cosine_precision@1": 0.706, "eval_cosine_precision@10": 0.09120000000000002, "eval_cosine_precision@3": 0.2791666666666666, "eval_cosine_precision@5": 0.174, "eval_cosine_recall@1": 0.706, "eval_cosine_recall@10": 0.912, "eval_cosine_recall@3": 0.8375, "eval_cosine_recall@5": 0.87, "eval_loss": 1.6223056316375732, "eval_runtime": 2.7895, "eval_samples_per_second": 270.299, "eval_sequential_score": 0.782229618428024, "eval_steps_per_second": 2.151, "eval_sts-dev_pearson_cosine": 0.7982554363739512, "eval_sts-dev_pearson_dot": 0.7952587919868024, "eval_sts-dev_pearson_euclidean": 0.7869234232815028, "eval_sts-dev_pearson_manhattan": 0.7872366700437463, "eval_sts-dev_pearson_max": 0.7982554363739512, "eval_sts-dev_spearman_cosine": 0.8022561650534226, "eval_sts-dev_spearman_dot": 0.7985926145853256, "eval_sts-dev_spearman_euclidean": 0.8008740888247353, "eval_sts-dev_spearman_manhattan": 0.801728465873034, "eval_sts-dev_spearman_max": 0.8022561650534226, "step": 2500 }, { "epoch": 0.4265805574439157, "grad_norm": 6.482539653778076, "learning_rate": 7.099863282277834e-06, "loss": 1.5548, "step": 2510 }, { "epoch": 0.4282800815771584, "grad_norm": 7.853833198547363, "learning_rate": 7.072903810040381e-06, "loss": 1.6629, "step": 2520 }, { "epoch": 0.4299796057104011, "grad_norm": 6.526813507080078, "learning_rate": 7.0458713675767665e-06, "loss": 1.9463, "step": 2530 }, { "epoch": 0.43167912984364376, "grad_norm": 9.038359642028809, "learning_rate": 7.018766906481288e-06, "loss": 2.16, "step": 2540 }, { "epoch": 0.43337865397688646, "grad_norm": 5.049062728881836, "learning_rate": 6.991591380883435e-06, "loss": 1.6674, "step": 2550 }, { "epoch": 0.43507817811012917, "grad_norm": 6.659489154815674, "learning_rate": 6.964345747414307e-06, "loss": 1.7055, "step": 2560 }, { "epoch": 0.43677770224337187, "grad_norm": 8.091437339782715, "learning_rate": 6.937030965172935e-06, "loss": 1.8062, "step": 2570 }, { "epoch": 0.43847722637661457, "grad_norm": 7.644534587860107, "learning_rate": 6.909647995692522e-06, "loss": 2.1972, "step": 2580 }, { "epoch": 0.4401767505098572, "grad_norm": 7.349899768829346, "learning_rate": 6.88219780290659e-06, "loss": 1.9069, "step": 2590 }, { "epoch": 0.4418762746430999, "grad_norm": 8.312362670898438, "learning_rate": 6.854681353115056e-06, "loss": 2.6629, "step": 2600 }, { "epoch": 0.4435757987763426, "grad_norm": 8.937472343444824, "learning_rate": 6.82709961495021e-06, "loss": 2.4573, "step": 2610 }, { "epoch": 0.4452753229095853, "grad_norm": 6.268832683563232, "learning_rate": 6.799453559342619e-06, "loss": 2.0894, "step": 2620 }, { "epoch": 0.446974847042828, "grad_norm": 9.547843933105469, "learning_rate": 6.771744159486947e-06, "loss": 1.5755, "step": 2630 }, { "epoch": 0.4486743711760707, "grad_norm": 8.299346923828125, "learning_rate": 6.7439723908077e-06, "loss": 2.3673, "step": 2640 }, { "epoch": 0.4503738953093134, "grad_norm": 6.921226978302002, "learning_rate": 6.716139230924891e-06, "loss": 1.8997, "step": 2650 }, { "epoch": 0.4520734194425561, "grad_norm": 6.874094486236572, "learning_rate": 6.6882456596196125e-06, "loss": 2.0955, "step": 2660 }, { "epoch": 0.4537729435757988, "grad_norm": 7.336355686187744, "learning_rate": 6.660292658799565e-06, "loss": 2.0553, "step": 2670 }, { "epoch": 0.4554724677090415, "grad_norm": 6.311689376831055, "learning_rate": 6.632281212464479e-06, "loss": 1.9258, "step": 2680 }, { "epoch": 0.4571719918422842, "grad_norm": 5.280473232269287, "learning_rate": 6.60421230667148e-06, "loss": 1.7009, "step": 2690 }, { "epoch": 0.45887151597552683, "grad_norm": 5.1102986335754395, "learning_rate": 6.5760869295003796e-06, "loss": 1.9853, "step": 2700 }, { "epoch": 0.46057104010876954, "grad_norm": 6.345133304595947, "learning_rate": 6.54790607101889e-06, "loss": 1.8553, "step": 2710 }, { "epoch": 0.46227056424201224, "grad_norm": 9.678619384765625, "learning_rate": 6.519670723247773e-06, "loss": 1.7589, "step": 2720 }, { "epoch": 0.46397008837525494, "grad_norm": 4.833879470825195, "learning_rate": 6.491381880125916e-06, "loss": 1.827, "step": 2730 }, { "epoch": 0.46566961250849764, "grad_norm": 6.266608238220215, "learning_rate": 6.463040537475354e-06, "loss": 1.7476, "step": 2740 }, { "epoch": 0.4673691366417403, "grad_norm": 8.750253677368164, "learning_rate": 6.434647692966199e-06, "loss": 2.1204, "step": 2750 }, { "epoch": 0.469068660774983, "grad_norm": 5.197884559631348, "learning_rate": 6.406204346081531e-06, "loss": 1.5477, "step": 2760 }, { "epoch": 0.4707681849082257, "grad_norm": 7.584977626800537, "learning_rate": 6.377711498082213e-06, "loss": 1.9078, "step": 2770 }, { "epoch": 0.4724677090414684, "grad_norm": 5.226884365081787, "learning_rate": 6.349170151971641e-06, "loss": 1.53, "step": 2780 }, { "epoch": 0.4741672331747111, "grad_norm": 9.499238967895508, "learning_rate": 6.320581312460439e-06, "loss": 1.7868, "step": 2790 }, { "epoch": 0.47586675730795375, "grad_norm": 6.44546365737915, "learning_rate": 6.291945985931093e-06, "loss": 1.7781, "step": 2800 }, { "epoch": 0.47756628144119645, "grad_norm": 5.152192115783691, "learning_rate": 6.263265180402517e-06, "loss": 2.0043, "step": 2810 }, { "epoch": 0.47926580557443915, "grad_norm": 7.129688262939453, "learning_rate": 6.234539905494576e-06, "loss": 1.9584, "step": 2820 }, { "epoch": 0.48096532970768185, "grad_norm": 6.289784908294678, "learning_rate": 6.2057711723925455e-06, "loss": 1.9584, "step": 2830 }, { "epoch": 0.48266485384092456, "grad_norm": 7.36097526550293, "learning_rate": 6.176959993811511e-06, "loss": 1.9952, "step": 2840 }, { "epoch": 0.48436437797416726, "grad_norm": 4.622279644012451, "learning_rate": 6.148107383960723e-06, "loss": 1.9764, "step": 2850 }, { "epoch": 0.4860639021074099, "grad_norm": 8.651782989501953, "learning_rate": 6.119214358507889e-06, "loss": 1.8804, "step": 2860 }, { "epoch": 0.4877634262406526, "grad_norm": 6.295717716217041, "learning_rate": 6.090281934543429e-06, "loss": 1.8634, "step": 2870 }, { "epoch": 0.4894629503738953, "grad_norm": 5.505663871765137, "learning_rate": 6.061311130544664e-06, "loss": 1.8853, "step": 2880 }, { "epoch": 0.491162474507138, "grad_norm": 10.88358211517334, "learning_rate": 6.032302966339965e-06, "loss": 2.2094, "step": 2890 }, { "epoch": 0.4928619986403807, "grad_norm": 6.576531410217285, "learning_rate": 6.003258463072859e-06, "loss": 1.5955, "step": 2900 }, { "epoch": 0.49456152277362336, "grad_norm": 5.74630880355835, "learning_rate": 5.974178643166075e-06, "loss": 1.8756, "step": 2910 }, { "epoch": 0.49626104690686607, "grad_norm": 8.021421432495117, "learning_rate": 5.945064530285556e-06, "loss": 1.9114, "step": 2920 }, { "epoch": 0.49796057104010877, "grad_norm": 4.470407009124756, "learning_rate": 5.915917149304424e-06, "loss": 2.2359, "step": 2930 }, { "epoch": 0.49966009517335147, "grad_norm": 8.264505386352539, "learning_rate": 5.8867375262669076e-06, "loss": 1.7543, "step": 2940 }, { "epoch": 0.5013596193065941, "grad_norm": 6.8826069831848145, "learning_rate": 5.857526688352211e-06, "loss": 1.7754, "step": 2950 }, { "epoch": 0.5030591434398368, "grad_norm": 7.148299694061279, "learning_rate": 5.828285663838365e-06, "loss": 2.1194, "step": 2960 }, { "epoch": 0.5047586675730795, "grad_norm": 6.472208499908447, "learning_rate": 5.7990154820660295e-06, "loss": 1.7127, "step": 2970 }, { "epoch": 0.5064581917063222, "grad_norm": 8.591309547424316, "learning_rate": 5.769717173402253e-06, "loss": 1.6715, "step": 2980 }, { "epoch": 0.5081577158395649, "grad_norm": 5.60316801071167, "learning_rate": 5.740391769204209e-06, "loss": 1.4632, "step": 2990 }, { "epoch": 0.5098572399728076, "grad_norm": 5.79714822769165, "learning_rate": 5.71104030178288e-06, "loss": 1.7171, "step": 3000 }, { "epoch": 0.5098572399728076, "eval_cosine_accuracy@1": 0.707, "eval_cosine_accuracy@10": 0.915, "eval_cosine_accuracy@3": 0.839, "eval_cosine_accuracy@5": 0.874, "eval_cosine_map@100": 0.7842834572194356, "eval_cosine_mrr@10": 0.7809134920634915, "eval_cosine_ndcg@10": 0.8136383995155022, "eval_cosine_precision@1": 0.707, "eval_cosine_precision@10": 0.0915, "eval_cosine_precision@3": 0.2796666666666666, "eval_cosine_precision@5": 0.1748, "eval_cosine_recall@1": 0.707, "eval_cosine_recall@10": 0.915, "eval_cosine_recall@3": 0.839, "eval_cosine_recall@5": 0.874, "eval_loss": 1.6132309436798096, "eval_runtime": 2.7954, "eval_samples_per_second": 269.727, "eval_sequential_score": 0.7842834572194356, "eval_steps_per_second": 2.146, "eval_sts-dev_pearson_cosine": 0.8056038655154354, "eval_sts-dev_pearson_dot": 0.8019913183334593, "eval_sts-dev_pearson_euclidean": 0.7922504417572196, "eval_sts-dev_pearson_manhattan": 0.7937333279050955, "eval_sts-dev_pearson_max": 0.8056038655154354, "eval_sts-dev_spearman_cosine": 0.8083570013976158, "eval_sts-dev_spearman_dot": 0.8047381961082496, "eval_sts-dev_spearman_euclidean": 0.8071350345841625, "eval_sts-dev_spearman_manhattan": 0.8069596988802814, "eval_sts-dev_spearman_max": 0.8083570013976158, "step": 3000 }, { "epoch": 0.5115567641060503, "grad_norm": 6.9394049644470215, "learning_rate": 5.6816638043667294e-06, "loss": 1.4526, "step": 3010 }, { "epoch": 0.513256288239293, "grad_norm": 10.348281860351562, "learning_rate": 5.652263311065325e-06, "loss": 1.8309, "step": 3020 }, { "epoch": 0.5149558123725357, "grad_norm": 8.404902458190918, "learning_rate": 5.6228398568329344e-06, "loss": 1.9336, "step": 3030 }, { "epoch": 0.5166553365057783, "grad_norm": 6.444024562835693, "learning_rate": 5.593394477432096e-06, "loss": 1.7704, "step": 3040 }, { "epoch": 0.518354860639021, "grad_norm": 9.629279136657715, "learning_rate": 5.563928209397151e-06, "loss": 1.943, "step": 3050 }, { "epoch": 0.5200543847722637, "grad_norm": 6.889398097991943, "learning_rate": 5.534442089997771e-06, "loss": 1.8689, "step": 3060 }, { "epoch": 0.5217539089055064, "grad_norm": 7.757649898529053, "learning_rate": 5.5049371572024235e-06, "loss": 1.8908, "step": 3070 }, { "epoch": 0.5234534330387491, "grad_norm": 6.982544422149658, "learning_rate": 5.475414449641852e-06, "loss": 1.7924, "step": 3080 }, { "epoch": 0.5251529571719918, "grad_norm": 7.109350204467773, "learning_rate": 5.445875006572502e-06, "loss": 1.7891, "step": 3090 }, { "epoch": 0.5268524813052345, "grad_norm": 7.507669448852539, "learning_rate": 5.416319867839943e-06, "loss": 2.0208, "step": 3100 }, { "epoch": 0.5285520054384772, "grad_norm": 4.527754783630371, "learning_rate": 5.386750073842259e-06, "loss": 1.5862, "step": 3110 }, { "epoch": 0.53025152957172, "grad_norm": 8.16139030456543, "learning_rate": 5.357166665493434e-06, "loss": 2.2195, "step": 3120 }, { "epoch": 0.5319510537049627, "grad_norm": 7.385548114776611, "learning_rate": 5.327570684186697e-06, "loss": 1.6103, "step": 3130 }, { "epoch": 0.5336505778382054, "grad_norm": 7.784885406494141, "learning_rate": 5.297963171757874e-06, "loss": 2.5839, "step": 3140 }, { "epoch": 0.535350101971448, "grad_norm": 7.986569404602051, "learning_rate": 5.268345170448704e-06, "loss": 2.4304, "step": 3150 }, { "epoch": 0.5370496261046906, "grad_norm": 7.29852294921875, "learning_rate": 5.23871772287016e-06, "loss": 1.9009, "step": 3160 }, { "epoch": 0.5387491502379333, "grad_norm": 8.884086608886719, "learning_rate": 5.209081871965741e-06, "loss": 2.2161, "step": 3170 }, { "epoch": 0.540448674371176, "grad_norm": 6.930916786193848, "learning_rate": 5.179438660974756e-06, "loss": 1.9054, "step": 3180 }, { "epoch": 0.5421481985044188, "grad_norm": 6.724717617034912, "learning_rate": 5.149789133395606e-06, "loss": 1.3603, "step": 3190 }, { "epoch": 0.5438477226376615, "grad_norm": 11.375778198242188, "learning_rate": 5.120134332949049e-06, "loss": 2.1651, "step": 3200 }, { "epoch": 0.5455472467709042, "grad_norm": 9.330193519592285, "learning_rate": 5.0904753035414545e-06, "loss": 2.0636, "step": 3210 }, { "epoch": 0.5472467709041469, "grad_norm": 7.2718000411987305, "learning_rate": 5.060813089228064e-06, "loss": 2.0036, "step": 3220 }, { "epoch": 0.5489462950373896, "grad_norm": 5.489546775817871, "learning_rate": 5.0311487341762285e-06, "loss": 1.7724, "step": 3230 }, { "epoch": 0.5506458191706323, "grad_norm": 7.590505123138428, "learning_rate": 5.001483282628664e-06, "loss": 1.2798, "step": 3240 }, { "epoch": 0.552345343303875, "grad_norm": 5.193188190460205, "learning_rate": 4.971817778866681e-06, "loss": 1.8975, "step": 3250 }, { "epoch": 0.5540448674371176, "grad_norm": 4.518863677978516, "learning_rate": 4.9421532671734265e-06, "loss": 1.8153, "step": 3260 }, { "epoch": 0.5557443915703603, "grad_norm": 7.619812488555908, "learning_rate": 4.912490791797129e-06, "loss": 1.6361, "step": 3270 }, { "epoch": 0.557443915703603, "grad_norm": 8.381867408752441, "learning_rate": 4.882831396914332e-06, "loss": 2.2208, "step": 3280 }, { "epoch": 0.5591434398368457, "grad_norm": 4.878629684448242, "learning_rate": 4.853176126593139e-06, "loss": 1.7356, "step": 3290 }, { "epoch": 0.5608429639700884, "grad_norm": 6.435664653778076, "learning_rate": 4.823526024756464e-06, "loss": 1.8563, "step": 3300 }, { "epoch": 0.5625424881033311, "grad_norm": 6.569293022155762, "learning_rate": 4.793882135145278e-06, "loss": 1.8743, "step": 3310 }, { "epoch": 0.5642420122365738, "grad_norm": 6.728095054626465, "learning_rate": 4.764245501281869e-06, "loss": 1.7494, "step": 3320 }, { "epoch": 0.5659415363698165, "grad_norm": 6.320137977600098, "learning_rate": 4.734617166433113e-06, "loss": 1.5363, "step": 3330 }, { "epoch": 0.5676410605030592, "grad_norm": 5.589328765869141, "learning_rate": 4.7049981735737385e-06, "loss": 2.0732, "step": 3340 }, { "epoch": 0.5693405846363019, "grad_norm": 6.616926193237305, "learning_rate": 4.67538956534962e-06, "loss": 1.7303, "step": 3350 }, { "epoch": 0.5710401087695445, "grad_norm": 4.177309036254883, "learning_rate": 4.645792384041078e-06, "loss": 1.532, "step": 3360 }, { "epoch": 0.5727396329027872, "grad_norm": 6.281280517578125, "learning_rate": 4.616207671526175e-06, "loss": 2.2504, "step": 3370 }, { "epoch": 0.5744391570360299, "grad_norm": 6.781736373901367, "learning_rate": 4.586636469244054e-06, "loss": 1.6377, "step": 3380 }, { "epoch": 0.5761386811692726, "grad_norm": 8.028448104858398, "learning_rate": 4.557079818158269e-06, "loss": 2.1332, "step": 3390 }, { "epoch": 0.5778382053025153, "grad_norm": 5.485851764678955, "learning_rate": 4.527538758720144e-06, "loss": 1.8658, "step": 3400 }, { "epoch": 0.579537729435758, "grad_norm": 5.404677867889404, "learning_rate": 4.498014330832148e-06, "loss": 1.8165, "step": 3410 }, { "epoch": 0.5812372535690007, "grad_norm": 5.938612461090088, "learning_rate": 4.468507573811285e-06, "loss": 1.7847, "step": 3420 }, { "epoch": 0.5829367777022434, "grad_norm": 6.639806270599365, "learning_rate": 4.439019526352511e-06, "loss": 1.747, "step": 3430 }, { "epoch": 0.5846363018354861, "grad_norm": 10.548473358154297, "learning_rate": 4.409551226492173e-06, "loss": 1.8863, "step": 3440 }, { "epoch": 0.5863358259687288, "grad_norm": 6.586738109588623, "learning_rate": 4.380103711571459e-06, "loss": 2.0465, "step": 3450 }, { "epoch": 0.5880353501019715, "grad_norm": 6.210246562957764, "learning_rate": 4.350678018199887e-06, "loss": 1.6285, "step": 3460 }, { "epoch": 0.5897348742352141, "grad_norm": 8.666604995727539, "learning_rate": 4.321275182218821e-06, "loss": 2.278, "step": 3470 }, { "epoch": 0.5914343983684568, "grad_norm": 5.970696926116943, "learning_rate": 4.291896238664991e-06, "loss": 1.7391, "step": 3480 }, { "epoch": 0.5931339225016995, "grad_norm": 6.197153091430664, "learning_rate": 4.262542221734077e-06, "loss": 1.8116, "step": 3490 }, { "epoch": 0.5948334466349422, "grad_norm": 5.297378063201904, "learning_rate": 4.233214164744286e-06, "loss": 1.5104, "step": 3500 }, { "epoch": 0.5948334466349422, "eval_cosine_accuracy@1": 0.7, "eval_cosine_accuracy@10": 0.9105, "eval_cosine_accuracy@3": 0.8315, "eval_cosine_accuracy@5": 0.871, "eval_cosine_map@100": 0.7789008444240532, "eval_cosine_mrr@10": 0.775367857142857, "eval_cosine_ndcg@10": 0.8083837978692977, "eval_cosine_precision@1": 0.7, "eval_cosine_precision@10": 0.09105, "eval_cosine_precision@3": 0.2771666666666666, "eval_cosine_precision@5": 0.17420000000000005, "eval_cosine_recall@1": 0.7, "eval_cosine_recall@10": 0.9105, "eval_cosine_recall@3": 0.8315, "eval_cosine_recall@5": 0.871, "eval_loss": 1.5698647499084473, "eval_runtime": 2.7632, "eval_samples_per_second": 272.875, "eval_sequential_score": 0.7789008444240532, "eval_steps_per_second": 2.171, "eval_sts-dev_pearson_cosine": 0.8017937499678631, "eval_sts-dev_pearson_dot": 0.7968971369444564, "eval_sts-dev_pearson_euclidean": 0.7889399312152248, "eval_sts-dev_pearson_manhattan": 0.7898347176162472, "eval_sts-dev_pearson_max": 0.8017937499678631, "eval_sts-dev_spearman_cosine": 0.8044304044501694, "eval_sts-dev_spearman_dot": 0.8013449184419915, "eval_sts-dev_spearman_euclidean": 0.8042950575910329, "eval_sts-dev_spearman_manhattan": 0.8042523771894302, "eval_sts-dev_spearman_max": 0.8044304044501694, "step": 3500 }, { "epoch": 0.5965329707681849, "grad_norm": 5.6484575271606445, "learning_rate": 4.2039131000999865e-06, "loss": 1.8306, "step": 3510 }, { "epoch": 0.5982324949014276, "grad_norm": 7.109194278717041, "learning_rate": 4.174640059255369e-06, "loss": 1.6575, "step": 3520 }, { "epoch": 0.5999320190346703, "grad_norm": 8.67902660369873, "learning_rate": 4.145396072678126e-06, "loss": 1.56, "step": 3530 }, { "epoch": 0.601631543167913, "grad_norm": 5.615638732910156, "learning_rate": 4.116182169813185e-06, "loss": 1.8674, "step": 3540 }, { "epoch": 0.6033310673011557, "grad_norm": 6.5344085693359375, "learning_rate": 4.086999379046472e-06, "loss": 1.4909, "step": 3550 }, { "epoch": 0.6050305914343984, "grad_norm": 7.410928726196289, "learning_rate": 4.057848727668701e-06, "loss": 1.6653, "step": 3560 }, { "epoch": 0.6067301155676411, "grad_norm": 5.162845134735107, "learning_rate": 4.028731241839224e-06, "loss": 2.2836, "step": 3570 }, { "epoch": 0.6084296397008837, "grad_norm": 9.384202003479004, "learning_rate": 3.999647946549898e-06, "loss": 2.1572, "step": 3580 }, { "epoch": 0.6101291638341264, "grad_norm": 9.560714721679688, "learning_rate": 3.970599865589006e-06, "loss": 1.6969, "step": 3590 }, { "epoch": 0.6118286879673691, "grad_norm": 8.857442855834961, "learning_rate": 3.94158802150522e-06, "loss": 2.0679, "step": 3600 }, { "epoch": 0.6135282121006118, "grad_norm": 4.5523200035095215, "learning_rate": 3.912613435571603e-06, "loss": 1.8555, "step": 3610 }, { "epoch": 0.6152277362338545, "grad_norm": 4.791132926940918, "learning_rate": 3.8836771277496575e-06, "loss": 1.8506, "step": 3620 }, { "epoch": 0.6169272603670972, "grad_norm": 5.0394673347473145, "learning_rate": 3.854780116653429e-06, "loss": 1.4441, "step": 3630 }, { "epoch": 0.6186267845003399, "grad_norm": 8.9247465133667, "learning_rate": 3.8259234195136344e-06, "loss": 1.6772, "step": 3640 }, { "epoch": 0.6203263086335826, "grad_norm": 7.963625907897949, "learning_rate": 3.7971080521418636e-06, "loss": 1.6228, "step": 3650 }, { "epoch": 0.6220258327668253, "grad_norm": 5.572691917419434, "learning_rate": 3.7683350288948204e-06, "loss": 1.7809, "step": 3660 }, { "epoch": 0.623725356900068, "grad_norm": 4.029000282287598, "learning_rate": 3.7396053626386115e-06, "loss": 1.5777, "step": 3670 }, { "epoch": 0.6254248810333106, "grad_norm": 5.965753555297852, "learning_rate": 3.7109200647130983e-06, "loss": 2.0558, "step": 3680 }, { "epoch": 0.6271244051665533, "grad_norm": 5.478359222412109, "learning_rate": 3.6822801448962835e-06, "loss": 1.9044, "step": 3690 }, { "epoch": 0.628823929299796, "grad_norm": 6.156702518463135, "learning_rate": 3.6536866113687754e-06, "loss": 1.9202, "step": 3700 }, { "epoch": 0.6305234534330387, "grad_norm": 5.119917392730713, "learning_rate": 3.6251404706783005e-06, "loss": 1.4821, "step": 3710 }, { "epoch": 0.6322229775662814, "grad_norm": 5.908693790435791, "learning_rate": 3.5966427277042593e-06, "loss": 1.5492, "step": 3720 }, { "epoch": 0.6339225016995241, "grad_norm": 6.6029181480407715, "learning_rate": 3.568194385622359e-06, "loss": 1.7337, "step": 3730 }, { "epoch": 0.6356220258327668, "grad_norm": 6.767298221588135, "learning_rate": 3.5397964458693045e-06, "loss": 2.0108, "step": 3740 }, { "epoch": 0.6373215499660095, "grad_norm": 7.079708576202393, "learning_rate": 3.5114499081075386e-06, "loss": 1.7365, "step": 3750 }, { "epoch": 0.6390210740992522, "grad_norm": 5.657531261444092, "learning_rate": 3.4831557701900507e-06, "loss": 1.5291, "step": 3760 }, { "epoch": 0.6407205982324949, "grad_norm": 9.403407096862793, "learning_rate": 3.4549150281252635e-06, "loss": 1.8147, "step": 3770 }, { "epoch": 0.6424201223657376, "grad_norm": 4.905513763427734, "learning_rate": 3.4267286760419527e-06, "loss": 1.6704, "step": 3780 }, { "epoch": 0.6441196464989802, "grad_norm": 5.188987731933594, "learning_rate": 3.398597706154268e-06, "loss": 1.727, "step": 3790 }, { "epoch": 0.6458191706322229, "grad_norm": 9.689862251281738, "learning_rate": 3.3705231087267955e-06, "loss": 1.741, "step": 3800 }, { "epoch": 0.6475186947654656, "grad_norm": 5.145129203796387, "learning_rate": 3.3425058720396986e-06, "loss": 1.6929, "step": 3810 }, { "epoch": 0.6492182188987083, "grad_norm": 5.635613918304443, "learning_rate": 3.314546982353941e-06, "loss": 1.7855, "step": 3820 }, { "epoch": 0.650917743031951, "grad_norm": 5.333996295928955, "learning_rate": 3.2866474238765522e-06, "loss": 1.4379, "step": 3830 }, { "epoch": 0.6526172671651937, "grad_norm": 7.929337501525879, "learning_rate": 3.258808178725984e-06, "loss": 1.8255, "step": 3840 }, { "epoch": 0.6543167912984365, "grad_norm": 7.606388092041016, "learning_rate": 3.2310302268975545e-06, "loss": 1.8072, "step": 3850 }, { "epoch": 0.6560163154316792, "grad_norm": 5.5722126960754395, "learning_rate": 3.2033145462289284e-06, "loss": 1.4806, "step": 3860 }, { "epoch": 0.6577158395649219, "grad_norm": 4.687691688537598, "learning_rate": 3.175662112365707e-06, "loss": 1.7177, "step": 3870 }, { "epoch": 0.6594153636981646, "grad_norm": 5.950764179229736, "learning_rate": 3.1480738987270854e-06, "loss": 1.3493, "step": 3880 }, { "epoch": 0.6611148878314073, "grad_norm": 6.248993396759033, "learning_rate": 3.120550876471576e-06, "loss": 1.7455, "step": 3890 }, { "epoch": 0.6628144119646499, "grad_norm": 6.485055446624756, "learning_rate": 3.0930940144628385e-06, "loss": 1.5822, "step": 3900 }, { "epoch": 0.6645139360978926, "grad_norm": 6.7886881828308105, "learning_rate": 3.0657042792355528e-06, "loss": 2.4897, "step": 3910 }, { "epoch": 0.6662134602311353, "grad_norm": 4.51090669631958, "learning_rate": 3.0383826349614115e-06, "loss": 1.8648, "step": 3920 }, { "epoch": 0.667912984364378, "grad_norm": 7.129496097564697, "learning_rate": 3.0111300434151746e-06, "loss": 1.8095, "step": 3930 }, { "epoch": 0.6696125084976207, "grad_norm": 8.079377174377441, "learning_rate": 2.9839474639408108e-06, "loss": 2.0793, "step": 3940 }, { "epoch": 0.6713120326308634, "grad_norm": 6.9486284255981445, "learning_rate": 2.956835853417726e-06, "loss": 1.9273, "step": 3950 }, { "epoch": 0.6730115567641061, "grad_norm": 6.231419563293457, "learning_rate": 2.9297961662270893e-06, "loss": 1.6705, "step": 3960 }, { "epoch": 0.6747110808973488, "grad_norm": 9.692667007446289, "learning_rate": 2.902829354218219e-06, "loss": 1.8497, "step": 3970 }, { "epoch": 0.6764106050305915, "grad_norm": 6.941633701324463, "learning_rate": 2.875936366675096e-06, "loss": 2.1609, "step": 3980 }, { "epoch": 0.6781101291638342, "grad_norm": 4.512592315673828, "learning_rate": 2.8491181502829335e-06, "loss": 1.719, "step": 3990 }, { "epoch": 0.6798096532970768, "grad_norm": 13.518653869628906, "learning_rate": 2.822375649094846e-06, "loss": 1.7953, "step": 4000 }, { "epoch": 0.6798096532970768, "eval_cosine_accuracy@1": 0.7145, "eval_cosine_accuracy@10": 0.914, "eval_cosine_accuracy@3": 0.839, "eval_cosine_accuracy@5": 0.8745, "eval_cosine_map@100": 0.7881479093038395, "eval_cosine_mrr@10": 0.7848144841269835, "eval_cosine_ndcg@10": 0.8163072349117797, "eval_cosine_precision@1": 0.7145, "eval_cosine_precision@10": 0.09140000000000001, "eval_cosine_precision@3": 0.2796666666666666, "eval_cosine_precision@5": 0.1749, "eval_cosine_recall@1": 0.7145, "eval_cosine_recall@10": 0.914, "eval_cosine_recall@3": 0.839, "eval_cosine_recall@5": 0.8745, "eval_loss": 1.5573265552520752, "eval_runtime": 2.7757, "eval_samples_per_second": 271.645, "eval_sequential_score": 0.7881479093038395, "eval_steps_per_second": 2.162, "eval_sts-dev_pearson_cosine": 0.8053811920422538, "eval_sts-dev_pearson_dot": 0.8004660633078914, "eval_sts-dev_pearson_euclidean": 0.7928635065778744, "eval_sts-dev_pearson_manhattan": 0.7941416034985971, "eval_sts-dev_pearson_max": 0.8053811920422538, "eval_sts-dev_spearman_cosine": 0.8086109320148909, "eval_sts-dev_spearman_dot": 0.804234875942229, "eval_sts-dev_spearman_euclidean": 0.8084185226972233, "eval_sts-dev_spearman_manhattan": 0.8088695507249705, "eval_sts-dev_spearman_max": 0.8088695507249705, "step": 4000 } ], "logging_steps": 10, "max_steps": 5884, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }