|
{ |
|
"best_metric": 0.8287429760185343, |
|
"best_model_checkpoint": "result/my-sup-simcse-roberta-base_filtered_final_augx_0517_275578", |
|
"epoch": 3.0, |
|
"global_step": 1617, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_avg_sts": 0.7923727821060924, |
|
"eval_sickr_spearman": 0.7589401062590382, |
|
"eval_stsb_spearman": 0.8258054579531467, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_avg_sts": 0.788369556206714, |
|
"eval_sickr_spearman": 0.7429059316970545, |
|
"eval_stsb_spearman": 0.8338331807163735, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_avg_sts": 0.8104646483652821, |
|
"eval_sickr_spearman": 0.7750206308339096, |
|
"eval_stsb_spearman": 0.8459086658966546, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_avg_sts": 0.8156440464261943, |
|
"eval_sickr_spearman": 0.7790718621158088, |
|
"eval_stsb_spearman": 0.8522162307365796, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_avg_sts": 0.8158874725367419, |
|
"eval_sickr_spearman": 0.7767967729339896, |
|
"eval_stsb_spearman": 0.8549781721394941, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_avg_sts": 0.8226456308827159, |
|
"eval_sickr_spearman": 0.7900410609689407, |
|
"eval_stsb_spearman": 0.8552502007964913, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_avg_sts": 0.8233733407465815, |
|
"eval_sickr_spearman": 0.7881731314340124, |
|
"eval_stsb_spearman": 0.8585735500591508, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_avg_sts": 0.8246758200303745, |
|
"eval_sickr_spearman": 0.7910648919271954, |
|
"eval_stsb_spearman": 0.8582867481335537, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_avg_sts": 0.8127837390967988, |
|
"eval_sickr_spearman": 0.7683723098661505, |
|
"eval_stsb_spearman": 0.8571951683274471, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_avg_sts": 0.8211001358940954, |
|
"eval_sickr_spearman": 0.7857309900813916, |
|
"eval_stsb_spearman": 0.8564692817067991, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_avg_sts": 0.8231415754884768, |
|
"eval_sickr_spearman": 0.7870932481803794, |
|
"eval_stsb_spearman": 0.859189902796574, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_avg_sts": 0.8281698148839484, |
|
"eval_sickr_spearman": 0.7933201442641206, |
|
"eval_stsb_spearman": 0.8630194855037762, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_avg_sts": 0.8261624892170372, |
|
"eval_sickr_spearman": 0.7934045829404487, |
|
"eval_stsb_spearman": 0.8589203954936255, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_avg_sts": 0.8223223884475288, |
|
"eval_sickr_spearman": 0.7860610117793729, |
|
"eval_stsb_spearman": 0.8585837651156847, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_avg_sts": 0.8239117966986287, |
|
"eval_sickr_spearman": 0.7878053572903008, |
|
"eval_stsb_spearman": 0.8600182361069565, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_avg_sts": 0.8189825828946973, |
|
"eval_sickr_spearman": 0.7824180448596978, |
|
"eval_stsb_spearman": 0.8555471209296966, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_avg_sts": 0.8284554395260346, |
|
"eval_sickr_spearman": 0.7938278330063215, |
|
"eval_stsb_spearman": 0.8630830460457476, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_avg_sts": 0.8287429760185343, |
|
"eval_sickr_spearman": 0.7950705897249312, |
|
"eval_stsb_spearman": 0.8624153623121376, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_avg_sts": 0.8175086199936331, |
|
"eval_sickr_spearman": 0.7783943353989228, |
|
"eval_stsb_spearman": 0.8566229045883434, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.453927025355597e-05, |
|
"loss": 0.4533, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_avg_sts": 0.8230793846551405, |
|
"eval_sickr_spearman": 0.791305047434386, |
|
"eval_stsb_spearman": 0.8548537218758949, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_avg_sts": 0.813507998872834, |
|
"eval_sickr_spearman": 0.7710531177618173, |
|
"eval_stsb_spearman": 0.8559628799838506, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_avg_sts": 0.8152806946649198, |
|
"eval_sickr_spearman": 0.7748347024402428, |
|
"eval_stsb_spearman": 0.8557266868895969, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_avg_sts": 0.8193593042077025, |
|
"eval_sickr_spearman": 0.7769575330305031, |
|
"eval_stsb_spearman": 0.8617610753849019, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_avg_sts": 0.8122703001838674, |
|
"eval_sickr_spearman": 0.7715038416177126, |
|
"eval_stsb_spearman": 0.8530367587500223, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_avg_sts": 0.8236420464403356, |
|
"eval_sickr_spearman": 0.7849814167123483, |
|
"eval_stsb_spearman": 0.8623026761683228, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_avg_sts": 0.8133106230456628, |
|
"eval_sickr_spearman": 0.770097202780996, |
|
"eval_stsb_spearman": 0.8565240433103297, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_avg_sts": 0.8207134638537716, |
|
"eval_sickr_spearman": 0.7790919391162099, |
|
"eval_stsb_spearman": 0.8623349885913333, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_avg_sts": 0.8130353473025216, |
|
"eval_sickr_spearman": 0.7694488309426829, |
|
"eval_stsb_spearman": 0.8566218636623602, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_avg_sts": 0.8135852616644544, |
|
"eval_sickr_spearman": 0.7713928417422891, |
|
"eval_stsb_spearman": 0.8557776815866198, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_avg_sts": 0.8182033488573868, |
|
"eval_sickr_spearman": 0.7772465361678562, |
|
"eval_stsb_spearman": 0.8591601615469172, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_avg_sts": 0.8248685458177708, |
|
"eval_sickr_spearman": 0.7874361422135461, |
|
"eval_stsb_spearman": 0.8623009494219955, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_avg_sts": 0.8161782720571904, |
|
"eval_sickr_spearman": 0.7755229400927496, |
|
"eval_stsb_spearman": 0.8568336040216311, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_avg_sts": 0.8207483887962784, |
|
"eval_sickr_spearman": 0.7822897537877566, |
|
"eval_stsb_spearman": 0.8592070238048003, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_avg_sts": 0.8239255438283619, |
|
"eval_sickr_spearman": 0.7867861853488854, |
|
"eval_stsb_spearman": 0.8610649023078384, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_avg_sts": 0.8190144607374632, |
|
"eval_sickr_spearman": 0.7796808964820441, |
|
"eval_stsb_spearman": 0.8583480249928824, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_avg_sts": 0.8211177033463906, |
|
"eval_sickr_spearman": 0.7840137821427762, |
|
"eval_stsb_spearman": 0.8582216245500051, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_avg_sts": 0.8201580926083631, |
|
"eval_sickr_spearman": 0.7816100656713059, |
|
"eval_stsb_spearman": 0.8587061195454205, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_avg_sts": 0.8140545936227954, |
|
"eval_sickr_spearman": 0.7693706363095416, |
|
"eval_stsb_spearman": 0.8587385509360493, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_avg_sts": 0.8205478478328316, |
|
"eval_sickr_spearman": 0.778550484509698, |
|
"eval_stsb_spearman": 0.8625452111559652, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9078540507111937e-05, |
|
"loss": 0.2352, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_avg_sts": 0.8194978907732944, |
|
"eval_sickr_spearman": 0.7797764783739058, |
|
"eval_stsb_spearman": 0.859219303172683, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_avg_sts": 0.8179603788237452, |
|
"eval_sickr_spearman": 0.7792966476705391, |
|
"eval_stsb_spearman": 0.8566241099769513, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_avg_sts": 0.814634655693672, |
|
"eval_sickr_spearman": 0.7726660501792106, |
|
"eval_stsb_spearman": 0.8566032612081333, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_avg_sts": 0.8229387572431492, |
|
"eval_sickr_spearman": 0.786164518802972, |
|
"eval_stsb_spearman": 0.8597129956833264, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_avg_sts": 0.8170446704890681, |
|
"eval_sickr_spearman": 0.7740013628302915, |
|
"eval_stsb_spearman": 0.8600879781478449, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_avg_sts": 0.823749807845302, |
|
"eval_sickr_spearman": 0.7840726202420378, |
|
"eval_stsb_spearman": 0.8634269954485662, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_avg_sts": 0.824679097934147, |
|
"eval_sickr_spearman": 0.786571486325457, |
|
"eval_stsb_spearman": 0.8627867095428369, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_avg_sts": 0.8215752053838693, |
|
"eval_sickr_spearman": 0.7836207916708096, |
|
"eval_stsb_spearman": 0.859529619096929, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_avg_sts": 0.8271625177527122, |
|
"eval_sickr_spearman": 0.790379536140775, |
|
"eval_stsb_spearman": 0.8639454993646494, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_avg_sts": 0.8175642564462906, |
|
"eval_sickr_spearman": 0.7762766441365164, |
|
"eval_stsb_spearman": 0.8588518687560649, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_avg_sts": 0.8189636390886708, |
|
"eval_sickr_spearman": 0.7794043814310648, |
|
"eval_stsb_spearman": 0.8585228967462767, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_avg_sts": 0.8235331481562411, |
|
"eval_sickr_spearman": 0.7853204202262986, |
|
"eval_stsb_spearman": 0.8617458760861836, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_avg_sts": 0.8212932121455792, |
|
"eval_sickr_spearman": 0.7838025893897528, |
|
"eval_stsb_spearman": 0.8587838349014056, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_avg_sts": 0.8200184185533622, |
|
"eval_sickr_spearman": 0.7804020834701373, |
|
"eval_stsb_spearman": 0.8596347536365871, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_avg_sts": 0.8184879956680027, |
|
"eval_sickr_spearman": 0.7775680563608829, |
|
"eval_stsb_spearman": 0.8594079349751224, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_avg_sts": 0.8240646293698506, |
|
"eval_sickr_spearman": 0.7871783112610262, |
|
"eval_stsb_spearman": 0.8609509474786748, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_avg_sts": 0.8232889615255443, |
|
"eval_sickr_spearman": 0.7868230252036885, |
|
"eval_stsb_spearman": 0.8597548978474002, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_avg_sts": 0.8214126106703122, |
|
"eval_sickr_spearman": 0.7826799584558398, |
|
"eval_stsb_spearman": 0.8601452628847847, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_avg_sts": 0.8256026599230287, |
|
"eval_sickr_spearman": 0.7879515639630783, |
|
"eval_stsb_spearman": 0.8632537558829791, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_avg_sts": 0.8227290144728177, |
|
"eval_sickr_spearman": 0.7835550370929407, |
|
"eval_stsb_spearman": 0.8619029918526946, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6178107606679037e-06, |
|
"loss": 0.2129, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_avg_sts": 0.8228934679819335, |
|
"eval_sickr_spearman": 0.7842996632585357, |
|
"eval_stsb_spearman": 0.8614872727053314, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_avg_sts": 0.8208607916598496, |
|
"eval_sickr_spearman": 0.7816013240108441, |
|
"eval_stsb_spearman": 0.860120259308855, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_avg_sts": 0.8206687609569976, |
|
"eval_sickr_spearman": 0.7812808124709476, |
|
"eval_stsb_spearman": 0.8600567094430476, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_avg_sts": 0.8208944641677367, |
|
"eval_sickr_spearman": 0.7819390787161569, |
|
"eval_stsb_spearman": 0.8598498496193164, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_avg_sts": 0.8211952992867365, |
|
"eval_sickr_spearman": 0.7822705413471812, |
|
"eval_stsb_spearman": 0.8601200572262918, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1617, |
|
"train_runtime": 2320.0614, |
|
"train_samples_per_second": 0.697 |
|
} |
|
], |
|
"max_steps": 1617, |
|
"num_train_epochs": 3, |
|
"total_flos": 156090475282942080, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|