|
{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1227343082427979, "incorrect_loss_raw": 1.546127160390218, "correct_loss_per_char": 0.5613671541213989, "incorrect_loss_per_char": 0.773063580195109, "correct_loss_per_token": 1.1227343082427979, "incorrect_loss_per_token": 1.546127160390218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1227343082427979, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": true, "logits_per_token": -1.1227343082427979, "logits_per_char": -0.5613671541213989, "num_chars": 2}, {"sum_logits": -1.2555543184280396, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.2555543184280396, "logits_per_char": -0.6277771592140198, "num_chars": 2}, {"sum_logits": -1.5528144836425781, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.5528144836425781, "logits_per_char": -0.7764072418212891, "num_chars": 2}, {"sum_logits": -1.8300126791000366, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.8300126791000366, "logits_per_char": -0.9150063395500183, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8974121809005737, "incorrect_loss_raw": 1.2970905303955078, "correct_loss_per_char": 0.9487060904502869, "incorrect_loss_per_char": 0.6485452651977539, "correct_loss_per_token": 1.8974121809005737, "incorrect_loss_per_token": 1.2970905303955078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1083078384399414, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.1083078384399414, "logits_per_char": -0.5541539192199707, "num_chars": 2}, {"sum_logits": -1.2411274909973145, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.2411274909973145, "logits_per_char": -0.6205637454986572, "num_chars": 2}, {"sum_logits": -1.5418362617492676, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5418362617492676, "logits_per_char": -0.7709181308746338, "num_chars": 2}, {"sum_logits": -1.8974121809005737, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.8974121809005737, "logits_per_char": -0.9487060904502869, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4689288139343262, "incorrect_loss_raw": 1.4321807622909546, "correct_loss_per_char": 0.7344644069671631, "incorrect_loss_per_char": 0.7160903811454773, "correct_loss_per_token": 1.4689288139343262, "incorrect_loss_per_token": 1.4321807622909546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2610199451446533, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.2610199451446533, "logits_per_char": -0.6305099725723267, "num_chars": 2}, {"sum_logits": -1.1185898780822754, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.1185898780822754, "logits_per_char": -0.5592949390411377, "num_chars": 2}, {"sum_logits": -1.4689288139343262, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.4689288139343262, "logits_per_char": -0.7344644069671631, "num_chars": 2}, {"sum_logits": -1.916932463645935, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.916932463645935, "logits_per_char": -0.9584662318229675, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.44450843334198, "incorrect_loss_raw": 1.4012714227040608, "correct_loss_per_char": 0.72225421667099, "incorrect_loss_per_char": 0.7006357113520304, "correct_loss_per_token": 1.44450843334198, "incorrect_loss_per_token": 1.4012714227040608, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3176287412643433, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.3176287412643433, "logits_per_char": -0.6588143706321716, "num_chars": 2}, {"sum_logits": -1.2681671380996704, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": true, "logits_per_token": -1.2681671380996704, "logits_per_char": -0.6340835690498352, "num_chars": 2}, {"sum_logits": -1.44450843334198, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.44450843334198, "logits_per_char": -0.72225421667099, "num_chars": 2}, {"sum_logits": -1.618018388748169, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.618018388748169, "logits_per_char": -0.8090091943740845, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0672802925109863, "incorrect_loss_raw": 1.3029509782791138, "correct_loss_per_char": 1.0336401462554932, "incorrect_loss_per_char": 0.6514754891395569, "correct_loss_per_token": 2.0672802925109863, "incorrect_loss_per_token": 1.3029509782791138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9692131280899048, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": true, "logits_per_token": -0.9692131280899048, "logits_per_char": -0.4846065640449524, "num_chars": 2}, {"sum_logits": -1.206819772720337, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.206819772720337, "logits_per_char": -0.6034098863601685, "num_chars": 2}, {"sum_logits": -1.7328200340270996, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.7328200340270996, "logits_per_char": -0.8664100170135498, "num_chars": 2}, {"sum_logits": -2.0672802925109863, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -2.0672802925109863, "logits_per_char": -1.0336401462554932, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4350957870483398, "incorrect_loss_raw": 1.4127087593078613, "correct_loss_per_char": 0.7175478935241699, "incorrect_loss_per_char": 0.7063543796539307, "correct_loss_per_token": 1.4350957870483398, "incorrect_loss_per_token": 1.4127087593078613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.275491714477539, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.275491714477539, "logits_per_char": -0.6377458572387695, "num_chars": 2}, {"sum_logits": -1.251090168952942, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -1.251090168952942, "logits_per_char": -0.625545084476471, "num_chars": 2}, {"sum_logits": -1.4350957870483398, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.4350957870483398, "logits_per_char": -0.7175478935241699, "num_chars": 2}, {"sum_logits": -1.711544394493103, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.711544394493103, "logits_per_char": -0.8557721972465515, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1694105863571167, "incorrect_loss_raw": 1.5054983297983806, "correct_loss_per_char": 0.5847052931785583, "incorrect_loss_per_char": 0.7527491648991903, "correct_loss_per_token": 1.1694105863571167, "incorrect_loss_per_token": 1.5054983297983806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4001327753067017, "num_tokens": 1, "num_tokens_all": 652, "is_greedy": false, "logits_per_token": -1.4001327753067017, "logits_per_char": -0.7000663876533508, "num_chars": 2}, {"sum_logits": -1.1694105863571167, "num_tokens": 1, "num_tokens_all": 652, "is_greedy": true, "logits_per_token": -1.1694105863571167, "logits_per_char": -0.5847052931785583, "num_chars": 2}, {"sum_logits": -1.4168730974197388, "num_tokens": 1, "num_tokens_all": 652, "is_greedy": false, "logits_per_token": -1.4168730974197388, "logits_per_char": -0.7084365487098694, "num_chars": 2}, {"sum_logits": -1.6994891166687012, "num_tokens": 1, "num_tokens_all": 652, "is_greedy": false, "logits_per_token": -1.6994891166687012, "logits_per_char": -0.8497445583343506, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0816802978515625, "incorrect_loss_raw": 1.5435821612675984, "correct_loss_per_char": 0.5408401489257812, "incorrect_loss_per_char": 0.7717910806337992, "correct_loss_per_token": 1.0816802978515625, "incorrect_loss_per_token": 1.5435821612675984, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4433780908584595, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.4433780908584595, "logits_per_char": -0.7216890454292297, "num_chars": 2}, {"sum_logits": -1.0816802978515625, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": true, "logits_per_token": -1.0816802978515625, "logits_per_char": -0.5408401489257812, "num_chars": 2}, {"sum_logits": -1.4343342781066895, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.4343342781066895, "logits_per_char": -0.7171671390533447, "num_chars": 2}, {"sum_logits": -1.7530341148376465, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.7530341148376465, "logits_per_char": -0.8765170574188232, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8364535570144653, "incorrect_loss_raw": 1.2881282567977905, "correct_loss_per_char": 0.9182267785072327, "incorrect_loss_per_char": 0.6440641283988953, "correct_loss_per_token": 1.8364535570144653, "incorrect_loss_per_token": 1.2881282567977905, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3386858701705933, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.3386858701705933, "logits_per_char": -0.6693429350852966, "num_chars": 2}, {"sum_logits": -1.2514517307281494, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": true, "logits_per_token": -1.2514517307281494, "logits_per_char": -0.6257258653640747, "num_chars": 2}, {"sum_logits": -1.274247169494629, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.274247169494629, "logits_per_char": -0.6371235847473145, "num_chars": 2}, {"sum_logits": -1.8364535570144653, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.8364535570144653, "logits_per_char": -0.9182267785072327, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8235934972763062, "incorrect_loss_raw": 1.3079261382420857, "correct_loss_per_char": 0.9117967486381531, "incorrect_loss_per_char": 0.6539630691210429, "correct_loss_per_token": 1.8235934972763062, "incorrect_loss_per_token": 1.3079261382420857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4569439888000488, "num_tokens": 1, "num_tokens_all": 708, "is_greedy": false, "logits_per_token": -1.4569439888000488, "logits_per_char": -0.7284719944000244, "num_chars": 2}, {"sum_logits": -1.057741403579712, "num_tokens": 1, "num_tokens_all": 708, "is_greedy": true, "logits_per_token": -1.057741403579712, "logits_per_char": -0.528870701789856, "num_chars": 2}, {"sum_logits": -1.4090930223464966, "num_tokens": 1, "num_tokens_all": 708, "is_greedy": false, "logits_per_token": -1.4090930223464966, "logits_per_char": -0.7045465111732483, "num_chars": 2}, {"sum_logits": -1.8235934972763062, "num_tokens": 1, "num_tokens_all": 708, "is_greedy": false, "logits_per_token": -1.8235934972763062, "logits_per_char": -0.9117967486381531, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.76084303855896, "incorrect_loss_raw": 1.307548999786377, "correct_loss_per_char": 0.88042151927948, "incorrect_loss_per_char": 0.6537744998931885, "correct_loss_per_token": 1.76084303855896, "incorrect_loss_per_token": 1.307548999786377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.412014365196228, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.412014365196228, "logits_per_char": -0.706007182598114, "num_chars": 2}, {"sum_logits": -1.1659657955169678, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": true, "logits_per_token": -1.1659657955169678, "logits_per_char": -0.5829828977584839, "num_chars": 2}, {"sum_logits": -1.344666838645935, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.344666838645935, "logits_per_char": -0.6723334193229675, "num_chars": 2}, {"sum_logits": -1.76084303855896, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.76084303855896, "logits_per_char": -0.88042151927948, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4553718566894531, "incorrect_loss_raw": 1.4180787801742554, "correct_loss_per_char": 0.7276859283447266, "incorrect_loss_per_char": 0.7090393900871277, "correct_loss_per_token": 1.4553718566894531, "incorrect_loss_per_token": 1.4180787801742554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2511203289031982, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.2511203289031982, "logits_per_char": -0.6255601644515991, "num_chars": 2}, {"sum_logits": -1.1965712308883667, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": true, "logits_per_token": -1.1965712308883667, "logits_per_char": -0.5982856154441833, "num_chars": 2}, {"sum_logits": -1.4553718566894531, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.4553718566894531, "logits_per_char": -0.7276859283447266, "num_chars": 2}, {"sum_logits": -1.8065447807312012, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.8065447807312012, "logits_per_char": -0.9032723903656006, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2941392660140991, "incorrect_loss_raw": 1.4804759820302327, "correct_loss_per_char": 0.6470696330070496, "incorrect_loss_per_char": 0.7402379910151163, "correct_loss_per_token": 1.2941392660140991, "incorrect_loss_per_token": 1.4804759820302327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2941392660140991, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.2941392660140991, "logits_per_char": -0.6470696330070496, "num_chars": 2}, {"sum_logits": -1.1798713207244873, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": true, "logits_per_token": -1.1798713207244873, "logits_per_char": -0.5899356603622437, "num_chars": 2}, {"sum_logits": -1.3996199369430542, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.3996199369430542, "logits_per_char": -0.6998099684715271, "num_chars": 2}, {"sum_logits": -1.8619366884231567, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.8619366884231567, "logits_per_char": -0.9309683442115784, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6578296422958374, "incorrect_loss_raw": 1.3301840623219807, "correct_loss_per_char": 0.8289148211479187, "incorrect_loss_per_char": 0.6650920311609904, "correct_loss_per_token": 1.6578296422958374, "incorrect_loss_per_token": 1.3301840623219807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3411190509796143, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.3411190509796143, "logits_per_char": -0.6705595254898071, "num_chars": 2}, {"sum_logits": -1.211282730102539, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -1.211282730102539, "logits_per_char": -0.6056413650512695, "num_chars": 2}, {"sum_logits": -1.438150405883789, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.438150405883789, "logits_per_char": -0.7190752029418945, "num_chars": 2}, {"sum_logits": -1.6578296422958374, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.6578296422958374, "logits_per_char": -0.8289148211479187, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.220359444618225, "incorrect_loss_raw": 1.4901373386383057, "correct_loss_per_char": 0.6101797223091125, "incorrect_loss_per_char": 0.7450686693191528, "correct_loss_per_token": 1.220359444618225, "incorrect_loss_per_token": 1.4901373386383057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3159325122833252, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.3159325122833252, "logits_per_char": -0.6579662561416626, "num_chars": 2}, {"sum_logits": -1.220359444618225, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.220359444618225, "logits_per_char": -0.6101797223091125, "num_chars": 2}, {"sum_logits": -1.5263609886169434, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5263609886169434, "logits_per_char": -0.7631804943084717, "num_chars": 2}, {"sum_logits": -1.6281185150146484, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.6281185150146484, "logits_per_char": -0.8140592575073242, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3286912441253662, "incorrect_loss_raw": 1.4744805097579956, "correct_loss_per_char": 0.6643456220626831, "incorrect_loss_per_char": 0.7372402548789978, "correct_loss_per_token": 1.3286912441253662, "incorrect_loss_per_token": 1.4744805097579956, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.407554030418396, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.407554030418396, "logits_per_char": -0.703777015209198, "num_chars": 2}, {"sum_logits": -1.1053972244262695, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": true, "logits_per_token": -1.1053972244262695, "logits_per_char": -0.5526986122131348, "num_chars": 2}, {"sum_logits": -1.3286912441253662, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.3286912441253662, "logits_per_char": -0.6643456220626831, "num_chars": 2}, {"sum_logits": -1.9104902744293213, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.9104902744293213, "logits_per_char": -0.9552451372146606, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5466687679290771, "incorrect_loss_raw": 1.387686292330424, "correct_loss_per_char": 0.7733343839645386, "incorrect_loss_per_char": 0.693843146165212, "correct_loss_per_token": 1.5466687679290771, "incorrect_loss_per_token": 1.387686292330424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2896440029144287, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.2896440029144287, "logits_per_char": -0.6448220014572144, "num_chars": 2}, {"sum_logits": -1.123928189277649, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.123928189277649, "logits_per_char": -0.5619640946388245, "num_chars": 2}, {"sum_logits": -1.5466687679290771, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5466687679290771, "logits_per_char": -0.7733343839645386, "num_chars": 2}, {"sum_logits": -1.7494866847991943, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.7494866847991943, "logits_per_char": -0.8747433423995972, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.439159870147705, "incorrect_loss_raw": 1.4386768341064453, "correct_loss_per_char": 0.7195799350738525, "incorrect_loss_per_char": 0.7193384170532227, "correct_loss_per_token": 1.439159870147705, "incorrect_loss_per_token": 1.4386768341064453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.439159870147705, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.439159870147705, "logits_per_char": -0.7195799350738525, "num_chars": 2}, {"sum_logits": -1.0860739946365356, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": true, "logits_per_token": -1.0860739946365356, "logits_per_char": -0.5430369973182678, "num_chars": 2}, {"sum_logits": -1.4435012340545654, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.4435012340545654, "logits_per_char": -0.7217506170272827, "num_chars": 2}, {"sum_logits": -1.7864552736282349, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.7864552736282349, "logits_per_char": -0.8932276368141174, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9069764614105225, "incorrect_loss_raw": 1.3124589522679646, "correct_loss_per_char": 0.9534882307052612, "incorrect_loss_per_char": 0.6562294761339823, "correct_loss_per_token": 1.9069764614105225, "incorrect_loss_per_token": 1.3124589522679646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0373581647872925, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.0373581647872925, "logits_per_char": -0.5186790823936462, "num_chars": 2}, {"sum_logits": -1.232926607131958, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.232926607131958, "logits_per_char": -0.616463303565979, "num_chars": 2}, {"sum_logits": -1.6670920848846436, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.6670920848846436, "logits_per_char": -0.8335460424423218, "num_chars": 2}, {"sum_logits": -1.9069764614105225, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.9069764614105225, "logits_per_char": -0.9534882307052612, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.539278507232666, "incorrect_loss_raw": 1.3881803750991821, "correct_loss_per_char": 0.769639253616333, "incorrect_loss_per_char": 0.6940901875495911, "correct_loss_per_token": 1.539278507232666, "incorrect_loss_per_token": 1.3881803750991821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.267379641532898, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.267379641532898, "logits_per_char": -0.633689820766449, "num_chars": 2}, {"sum_logits": -1.2007691860198975, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": true, "logits_per_token": -1.2007691860198975, "logits_per_char": -0.6003845930099487, "num_chars": 2}, {"sum_logits": -1.539278507232666, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.539278507232666, "logits_per_char": -0.769639253616333, "num_chars": 2}, {"sum_logits": -1.696392297744751, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.696392297744751, "logits_per_char": -0.8481961488723755, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9747966527938843, "incorrect_loss_raw": 1.286645531654358, "correct_loss_per_char": 0.9873983263969421, "incorrect_loss_per_char": 0.643322765827179, "correct_loss_per_token": 1.9747966527938843, "incorrect_loss_per_token": 1.286645531654358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2649645805358887, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.2649645805358887, "logits_per_char": -0.6324822902679443, "num_chars": 2}, {"sum_logits": -1.033324956893921, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": true, "logits_per_token": -1.033324956893921, "logits_per_char": -0.5166624784469604, "num_chars": 2}, {"sum_logits": -1.5616470575332642, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.5616470575332642, "logits_per_char": -0.7808235287666321, "num_chars": 2}, {"sum_logits": -1.9747966527938843, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.9747966527938843, "logits_per_char": -0.9873983263969421, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8292655944824219, "incorrect_loss_raw": 1.2972280979156494, "correct_loss_per_char": 0.9146327972412109, "incorrect_loss_per_char": 0.6486140489578247, "correct_loss_per_token": 1.8292655944824219, "incorrect_loss_per_token": 1.2972280979156494, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2821884155273438, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.2821884155273438, "logits_per_char": -0.6410942077636719, "num_chars": 2}, {"sum_logits": -1.1994174718856812, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": true, "logits_per_token": -1.1994174718856812, "logits_per_char": -0.5997087359428406, "num_chars": 2}, {"sum_logits": -1.4100784063339233, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.4100784063339233, "logits_per_char": -0.7050392031669617, "num_chars": 2}, {"sum_logits": -1.8292655944824219, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.8292655944824219, "logits_per_char": -0.9146327972412109, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.856727123260498, "incorrect_loss_raw": 1.3211716413497925, "correct_loss_per_char": 0.928363561630249, "incorrect_loss_per_char": 0.6605858206748962, "correct_loss_per_token": 1.856727123260498, "incorrect_loss_per_token": 1.3211716413497925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2959434986114502, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.2959434986114502, "logits_per_char": -0.6479717493057251, "num_chars": 2}, {"sum_logits": -1.0624936819076538, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": true, "logits_per_token": -1.0624936819076538, "logits_per_char": -0.5312468409538269, "num_chars": 2}, {"sum_logits": -1.6050777435302734, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.6050777435302734, "logits_per_char": -0.8025388717651367, "num_chars": 2}, {"sum_logits": -1.856727123260498, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.856727123260498, "logits_per_char": -0.928363561630249, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1189649105072021, "incorrect_loss_raw": 1.5323377052942913, "correct_loss_per_char": 0.5594824552536011, "incorrect_loss_per_char": 0.7661688526471456, "correct_loss_per_token": 1.1189649105072021, "incorrect_loss_per_token": 1.5323377052942913, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.400464653968811, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.400464653968811, "logits_per_char": -0.7002323269844055, "num_chars": 2}, {"sum_logits": -1.1189649105072021, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": true, "logits_per_token": -1.1189649105072021, "logits_per_char": -0.5594824552536011, "num_chars": 2}, {"sum_logits": -1.3854985237121582, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.3854985237121582, "logits_per_char": -0.6927492618560791, "num_chars": 2}, {"sum_logits": -1.8110499382019043, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.8110499382019043, "logits_per_char": -0.9055249691009521, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.353379726409912, "incorrect_loss_raw": 1.480424960454305, "correct_loss_per_char": 0.676689863204956, "incorrect_loss_per_char": 0.7402124802271525, "correct_loss_per_token": 1.353379726409912, "incorrect_loss_per_token": 1.480424960454305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.175142526626587, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -1.175142526626587, "logits_per_char": -0.5875712633132935, "num_chars": 2}, {"sum_logits": -1.3100886344909668, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.3100886344909668, "logits_per_char": -0.6550443172454834, "num_chars": 2}, {"sum_logits": -1.353379726409912, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.353379726409912, "logits_per_char": -0.676689863204956, "num_chars": 2}, {"sum_logits": -1.9560437202453613, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.9560437202453613, "logits_per_char": -0.9780218601226807, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7951328754425049, "incorrect_loss_raw": 1.326493223508199, "correct_loss_per_char": 0.8975664377212524, "incorrect_loss_per_char": 0.6632466117540995, "correct_loss_per_token": 1.7951328754425049, "incorrect_loss_per_token": 1.326493223508199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5597918033599854, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.5597918033599854, "logits_per_char": -0.7798959016799927, "num_chars": 2}, {"sum_logits": -1.0455321073532104, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.0455321073532104, "logits_per_char": -0.5227660536766052, "num_chars": 2}, {"sum_logits": -1.3741557598114014, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.3741557598114014, "logits_per_char": -0.6870778799057007, "num_chars": 2}, {"sum_logits": -1.7951328754425049, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.7951328754425049, "logits_per_char": -0.8975664377212524, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.512484073638916, "incorrect_loss_raw": 1.4179540475209553, "correct_loss_per_char": 0.756242036819458, "incorrect_loss_per_char": 0.7089770237604777, "correct_loss_per_token": 1.512484073638916, "incorrect_loss_per_token": 1.4179540475209553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1924563646316528, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.1924563646316528, "logits_per_char": -0.5962281823158264, "num_chars": 2}, {"sum_logits": -1.1555867195129395, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.1555867195129395, "logits_per_char": -0.5777933597564697, "num_chars": 2}, {"sum_logits": -1.512484073638916, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.512484073638916, "logits_per_char": -0.756242036819458, "num_chars": 2}, {"sum_logits": -1.905819058418274, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.905819058418274, "logits_per_char": -0.952909529209137, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8496214151382446, "incorrect_loss_raw": 1.3027679125467937, "correct_loss_per_char": 0.9248107075691223, "incorrect_loss_per_char": 0.6513839562733968, "correct_loss_per_token": 1.8496214151382446, "incorrect_loss_per_token": 1.3027679125467937, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1598480939865112, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": true, "logits_per_token": -1.1598480939865112, "logits_per_char": -0.5799240469932556, "num_chars": 2}, {"sum_logits": -1.273748755455017, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.273748755455017, "logits_per_char": -0.6368743777275085, "num_chars": 2}, {"sum_logits": -1.4747068881988525, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.4747068881988525, "logits_per_char": -0.7373534440994263, "num_chars": 2}, {"sum_logits": -1.8496214151382446, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.8496214151382446, "logits_per_char": -0.9248107075691223, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1163249015808105, "incorrect_loss_raw": 1.5283534129460652, "correct_loss_per_char": 0.5581624507904053, "incorrect_loss_per_char": 0.7641767064730326, "correct_loss_per_token": 1.1163249015808105, "incorrect_loss_per_token": 1.5283534129460652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3372243642807007, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.3372243642807007, "logits_per_char": -0.6686121821403503, "num_chars": 2}, {"sum_logits": -1.1163249015808105, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": true, "logits_per_token": -1.1163249015808105, "logits_per_char": -0.5581624507904053, "num_chars": 2}, {"sum_logits": -1.4839448928833008, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.4839448928833008, "logits_per_char": -0.7419724464416504, "num_chars": 2}, {"sum_logits": -1.7638909816741943, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.7638909816741943, "logits_per_char": -0.8819454908370972, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.475791573524475, "incorrect_loss_raw": 1.41526464621226, "correct_loss_per_char": 0.7378957867622375, "incorrect_loss_per_char": 0.70763232310613, "correct_loss_per_token": 1.475791573524475, "incorrect_loss_per_token": 1.41526464621226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2841737270355225, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.2841737270355225, "logits_per_char": -0.6420868635177612, "num_chars": 2}, {"sum_logits": -1.1286201477050781, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.1286201477050781, "logits_per_char": -0.5643100738525391, "num_chars": 2}, {"sum_logits": -1.475791573524475, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.475791573524475, "logits_per_char": -0.7378957867622375, "num_chars": 2}, {"sum_logits": -1.8330000638961792, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.8330000638961792, "logits_per_char": -0.9165000319480896, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1681010723114014, "incorrect_loss_raw": 1.5082322359085083, "correct_loss_per_char": 0.5840505361557007, "incorrect_loss_per_char": 0.7541161179542542, "correct_loss_per_token": 1.1681010723114014, "incorrect_loss_per_token": 1.5082322359085083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4195905923843384, "num_tokens": 1, "num_tokens_all": 678, "is_greedy": false, "logits_per_token": -1.4195905923843384, "logits_per_char": -0.7097952961921692, "num_chars": 2}, {"sum_logits": -1.1681010723114014, "num_tokens": 1, "num_tokens_all": 678, "is_greedy": true, "logits_per_token": -1.1681010723114014, "logits_per_char": -0.5840505361557007, "num_chars": 2}, {"sum_logits": -1.3106402158737183, "num_tokens": 1, "num_tokens_all": 678, "is_greedy": false, "logits_per_token": -1.3106402158737183, "logits_per_char": -0.6553201079368591, "num_chars": 2}, {"sum_logits": -1.7944658994674683, "num_tokens": 1, "num_tokens_all": 678, "is_greedy": false, "logits_per_token": -1.7944658994674683, "logits_per_char": -0.8972329497337341, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4885501861572266, "incorrect_loss_raw": 1.4092549880345662, "correct_loss_per_char": 0.7442750930786133, "incorrect_loss_per_char": 0.7046274940172831, "correct_loss_per_token": 1.4885501861572266, "incorrect_loss_per_token": 1.4092549880345662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2929695844650269, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.2929695844650269, "logits_per_char": -0.6464847922325134, "num_chars": 2}, {"sum_logits": -1.1386570930480957, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -1.1386570930480957, "logits_per_char": -0.5693285465240479, "num_chars": 2}, {"sum_logits": -1.4885501861572266, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.4885501861572266, "logits_per_char": -0.7442750930786133, "num_chars": 2}, {"sum_logits": -1.7961382865905762, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.7961382865905762, "logits_per_char": -0.8980691432952881, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8085167407989502, "incorrect_loss_raw": 1.3195947408676147, "correct_loss_per_char": 0.9042583703994751, "incorrect_loss_per_char": 0.6597973704338074, "correct_loss_per_token": 1.8085167407989502, "incorrect_loss_per_token": 1.3195947408676147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2814581394195557, "num_tokens": 1, "num_tokens_all": 564, "is_greedy": false, "logits_per_token": -1.2814581394195557, "logits_per_char": -0.6407290697097778, "num_chars": 2}, {"sum_logits": -1.0648692846298218, "num_tokens": 1, "num_tokens_all": 564, "is_greedy": true, "logits_per_token": -1.0648692846298218, "logits_per_char": -0.5324346423149109, "num_chars": 2}, {"sum_logits": -1.6124567985534668, "num_tokens": 1, "num_tokens_all": 564, "is_greedy": false, "logits_per_token": -1.6124567985534668, "logits_per_char": -0.8062283992767334, "num_chars": 2}, {"sum_logits": -1.8085167407989502, "num_tokens": 1, "num_tokens_all": 564, "is_greedy": false, "logits_per_token": -1.8085167407989502, "logits_per_char": -0.9042583703994751, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.828489899635315, "incorrect_loss_raw": 1.3012855052947998, "correct_loss_per_char": 0.9142449498176575, "incorrect_loss_per_char": 0.6506427526473999, "correct_loss_per_token": 1.828489899635315, "incorrect_loss_per_token": 1.3012855052947998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1939393281936646, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.1939393281936646, "logits_per_char": -0.5969696640968323, "num_chars": 2}, {"sum_logits": -1.1904083490371704, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -1.1904083490371704, "logits_per_char": -0.5952041745185852, "num_chars": 2}, {"sum_logits": -1.5195088386535645, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.5195088386535645, "logits_per_char": -0.7597544193267822, "num_chars": 2}, {"sum_logits": -1.828489899635315, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.828489899635315, "logits_per_char": -0.9142449498176575, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4196580648422241, "incorrect_loss_raw": 1.428694208463033, "correct_loss_per_char": 0.7098290324211121, "incorrect_loss_per_char": 0.7143471042315165, "correct_loss_per_token": 1.4196580648422241, "incorrect_loss_per_token": 1.428694208463033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2961814403533936, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.2961814403533936, "logits_per_char": -0.6480907201766968, "num_chars": 2}, {"sum_logits": -1.2033803462982178, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": true, "logits_per_token": -1.2033803462982178, "logits_per_char": -0.6016901731491089, "num_chars": 2}, {"sum_logits": -1.4196580648422241, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4196580648422241, "logits_per_char": -0.7098290324211121, "num_chars": 2}, {"sum_logits": -1.7865208387374878, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.7865208387374878, "logits_per_char": -0.8932604193687439, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7733829021453857, "incorrect_loss_raw": 1.3322913249333699, "correct_loss_per_char": 0.8866914510726929, "incorrect_loss_per_char": 0.6661456624666849, "correct_loss_per_token": 1.7733829021453857, "incorrect_loss_per_token": 1.3322913249333699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1491531133651733, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.1491531133651733, "logits_per_char": -0.5745765566825867, "num_chars": 2}, {"sum_logits": -1.1750518083572388, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.1750518083572388, "logits_per_char": -0.5875259041786194, "num_chars": 2}, {"sum_logits": -1.6726690530776978, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.6726690530776978, "logits_per_char": -0.8363345265388489, "num_chars": 2}, {"sum_logits": -1.7733829021453857, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.7733829021453857, "logits_per_char": -0.8866914510726929, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2216544151306152, "incorrect_loss_raw": 1.5103250741958618, "correct_loss_per_char": 0.6108272075653076, "incorrect_loss_per_char": 0.7551625370979309, "correct_loss_per_token": 1.2216544151306152, "incorrect_loss_per_token": 1.5103250741958618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2216544151306152, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.2216544151306152, "logits_per_char": -0.6108272075653076, "num_chars": 2}, {"sum_logits": -1.1338742971420288, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -1.1338742971420288, "logits_per_char": -0.5669371485710144, "num_chars": 2}, {"sum_logits": -1.557863712310791, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.557863712310791, "logits_per_char": -0.7789318561553955, "num_chars": 2}, {"sum_logits": -1.8392372131347656, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.8392372131347656, "logits_per_char": -0.9196186065673828, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4804131984710693, "incorrect_loss_raw": 1.4053815603256226, "correct_loss_per_char": 0.7402065992355347, "incorrect_loss_per_char": 0.7026907801628113, "correct_loss_per_token": 1.4804131984710693, "incorrect_loss_per_token": 1.4053815603256226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3414300680160522, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.3414300680160522, "logits_per_char": -0.6707150340080261, "num_chars": 2}, {"sum_logits": -1.1438008546829224, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": true, "logits_per_token": -1.1438008546829224, "logits_per_char": -0.5719004273414612, "num_chars": 2}, {"sum_logits": -1.4804131984710693, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.4804131984710693, "logits_per_char": -0.7402065992355347, "num_chars": 2}, {"sum_logits": -1.730913758277893, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.730913758277893, "logits_per_char": -0.8654568791389465, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2114615440368652, "incorrect_loss_raw": 1.4853815635045369, "correct_loss_per_char": 0.6057307720184326, "incorrect_loss_per_char": 0.7426907817522684, "correct_loss_per_token": 1.2114615440368652, "incorrect_loss_per_token": 1.4853815635045369, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4290344715118408, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.4290344715118408, "logits_per_char": -0.7145172357559204, "num_chars": 2}, {"sum_logits": -1.2114615440368652, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": true, "logits_per_token": -1.2114615440368652, "logits_per_char": -0.6057307720184326, "num_chars": 2}, {"sum_logits": -1.3043302297592163, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.3043302297592163, "logits_per_char": -0.6521651148796082, "num_chars": 2}, {"sum_logits": -1.7227799892425537, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.7227799892425537, "logits_per_char": -0.8613899946212769, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1525840759277344, "incorrect_loss_raw": 1.509279449780782, "correct_loss_per_char": 0.5762920379638672, "incorrect_loss_per_char": 0.754639724890391, "correct_loss_per_token": 1.1525840759277344, "incorrect_loss_per_token": 1.509279449780782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.40631902217865, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.40631902217865, "logits_per_char": -0.703159511089325, "num_chars": 2}, {"sum_logits": -1.1525840759277344, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": true, "logits_per_token": -1.1525840759277344, "logits_per_char": -0.5762920379638672, "num_chars": 2}, {"sum_logits": -1.414840817451477, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.414840817451477, "logits_per_char": -0.7074204087257385, "num_chars": 2}, {"sum_logits": -1.7066785097122192, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.7066785097122192, "logits_per_char": -0.8533392548561096, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5273096561431885, "incorrect_loss_raw": 1.4181382656097412, "correct_loss_per_char": 0.7636548280715942, "incorrect_loss_per_char": 0.7090691328048706, "correct_loss_per_token": 1.5273096561431885, "incorrect_loss_per_token": 1.4181382656097412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.383056640625, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.383056640625, "logits_per_char": -0.6915283203125, "num_chars": 2}, {"sum_logits": -1.041377067565918, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": true, "logits_per_token": -1.041377067565918, "logits_per_char": -0.520688533782959, "num_chars": 2}, {"sum_logits": -1.5273096561431885, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.5273096561431885, "logits_per_char": -0.7636548280715942, "num_chars": 2}, {"sum_logits": -1.8299810886383057, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.8299810886383057, "logits_per_char": -0.9149905443191528, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.677335500717163, "incorrect_loss_raw": 1.361053228378296, "correct_loss_per_char": 0.8386677503585815, "incorrect_loss_per_char": 0.680526614189148, "correct_loss_per_token": 1.677335500717163, "incorrect_loss_per_token": 1.361053228378296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4819202423095703, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.4819202423095703, "logits_per_char": -0.7409601211547852, "num_chars": 2}, {"sum_logits": -1.048020601272583, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": true, "logits_per_token": -1.048020601272583, "logits_per_char": -0.5240103006362915, "num_chars": 2}, {"sum_logits": -1.5532188415527344, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.5532188415527344, "logits_per_char": -0.7766094207763672, "num_chars": 2}, {"sum_logits": -1.677335500717163, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.677335500717163, "logits_per_char": -0.8386677503585815, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4192183017730713, "incorrect_loss_raw": 1.4166516462961833, "correct_loss_per_char": 0.7096091508865356, "incorrect_loss_per_char": 0.7083258231480917, "correct_loss_per_token": 1.4192183017730713, "incorrect_loss_per_token": 1.4166516462961833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4192183017730713, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.4192183017730713, "logits_per_char": -0.7096091508865356, "num_chars": 2}, {"sum_logits": -1.1649739742279053, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": true, "logits_per_token": -1.1649739742279053, "logits_per_char": -0.5824869871139526, "num_chars": 2}, {"sum_logits": -1.358526587486267, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.358526587486267, "logits_per_char": -0.6792632937431335, "num_chars": 2}, {"sum_logits": -1.7264543771743774, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.7264543771743774, "logits_per_char": -0.8632271885871887, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4380366802215576, "incorrect_loss_raw": 1.4244275490442913, "correct_loss_per_char": 0.7190183401107788, "incorrect_loss_per_char": 0.7122137745221456, "correct_loss_per_token": 1.4380366802215576, "incorrect_loss_per_token": 1.4244275490442913, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4380366802215576, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.4380366802215576, "logits_per_char": -0.7190183401107788, "num_chars": 2}, {"sum_logits": -1.100579023361206, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": true, "logits_per_token": -1.100579023361206, "logits_per_char": -0.550289511680603, "num_chars": 2}, {"sum_logits": -1.5684453248977661, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.5684453248977661, "logits_per_char": -0.7842226624488831, "num_chars": 2}, {"sum_logits": -1.6042582988739014, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.6042582988739014, "logits_per_char": -0.8021291494369507, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5210139751434326, "incorrect_loss_raw": 1.4065203269322712, "correct_loss_per_char": 0.7605069875717163, "incorrect_loss_per_char": 0.7032601634661356, "correct_loss_per_token": 1.5210139751434326, "incorrect_loss_per_token": 1.4065203269322712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.274367332458496, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.274367332458496, "logits_per_char": -0.637183666229248, "num_chars": 2}, {"sum_logits": -1.1279829740524292, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": true, "logits_per_token": -1.1279829740524292, "logits_per_char": -0.5639914870262146, "num_chars": 2}, {"sum_logits": -1.5210139751434326, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.5210139751434326, "logits_per_char": -0.7605069875717163, "num_chars": 2}, {"sum_logits": -1.8172106742858887, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.8172106742858887, "logits_per_char": -0.9086053371429443, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7954401969909668, "incorrect_loss_raw": 1.3041560252507527, "correct_loss_per_char": 0.8977200984954834, "incorrect_loss_per_char": 0.6520780126253763, "correct_loss_per_token": 1.7954401969909668, "incorrect_loss_per_token": 1.3041560252507527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3127349615097046, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.3127349615097046, "logits_per_char": -0.6563674807548523, "num_chars": 2}, {"sum_logits": -1.145845890045166, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": true, "logits_per_token": -1.145845890045166, "logits_per_char": -0.572922945022583, "num_chars": 2}, {"sum_logits": -1.4538872241973877, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4538872241973877, "logits_per_char": -0.7269436120986938, "num_chars": 2}, {"sum_logits": -1.7954401969909668, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.7954401969909668, "logits_per_char": -0.8977200984954834, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7874016761779785, "incorrect_loss_raw": 1.3071763912836711, "correct_loss_per_char": 0.8937008380889893, "incorrect_loss_per_char": 0.6535881956418356, "correct_loss_per_token": 1.7874016761779785, "incorrect_loss_per_token": 1.3071763912836711, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.283888339996338, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.283888339996338, "logits_per_char": -0.641944169998169, "num_chars": 2}, {"sum_logits": -1.1806597709655762, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": true, "logits_per_token": -1.1806597709655762, "logits_per_char": -0.5903298854827881, "num_chars": 2}, {"sum_logits": -1.4569810628890991, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4569810628890991, "logits_per_char": -0.7284905314445496, "num_chars": 2}, {"sum_logits": -1.7874016761779785, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.7874016761779785, "logits_per_char": -0.8937008380889893, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4079489707946777, "incorrect_loss_raw": 1.4718974828720093, "correct_loss_per_char": 0.7039744853973389, "incorrect_loss_per_char": 0.7359487414360046, "correct_loss_per_token": 1.4079489707946777, "incorrect_loss_per_token": 1.4718974828720093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4079489707946777, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.4079489707946777, "logits_per_char": -0.7039744853973389, "num_chars": 2}, {"sum_logits": -0.9539972543716431, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": true, "logits_per_token": -0.9539972543716431, "logits_per_char": -0.47699862718582153, "num_chars": 2}, {"sum_logits": -1.5857563018798828, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.5857563018798828, "logits_per_char": -0.7928781509399414, "num_chars": 2}, {"sum_logits": -1.875938892364502, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.875938892364502, "logits_per_char": -0.937969446182251, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4398010969161987, "incorrect_loss_raw": 1.4294801553090413, "correct_loss_per_char": 0.7199005484580994, "incorrect_loss_per_char": 0.7147400776545206, "correct_loss_per_token": 1.4398010969161987, "incorrect_loss_per_token": 1.4294801553090413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3143922090530396, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.3143922090530396, "logits_per_char": -0.6571961045265198, "num_chars": 2}, {"sum_logits": -1.1513665914535522, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": true, "logits_per_token": -1.1513665914535522, "logits_per_char": -0.5756832957267761, "num_chars": 2}, {"sum_logits": -1.4398010969161987, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.4398010969161987, "logits_per_char": -0.7199005484580994, "num_chars": 2}, {"sum_logits": -1.8226816654205322, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.8226816654205322, "logits_per_char": -0.9113408327102661, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0533509254455566, "incorrect_loss_raw": 1.5845719178517659, "correct_loss_per_char": 0.5266754627227783, "incorrect_loss_per_char": 0.7922859589258829, "correct_loss_per_token": 1.0533509254455566, "incorrect_loss_per_token": 1.5845719178517659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.313805341720581, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.313805341720581, "logits_per_char": -0.6569026708602905, "num_chars": 2}, {"sum_logits": -1.0533509254455566, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": true, "logits_per_token": -1.0533509254455566, "logits_per_char": -0.5266754627227783, "num_chars": 2}, {"sum_logits": -1.5008209943771362, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.5008209943771362, "logits_per_char": -0.7504104971885681, "num_chars": 2}, {"sum_logits": -1.9390894174575806, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.9390894174575806, "logits_per_char": -0.9695447087287903, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3851666450500488, "incorrect_loss_raw": 1.4230977296829224, "correct_loss_per_char": 0.6925833225250244, "incorrect_loss_per_char": 0.7115488648414612, "correct_loss_per_token": 1.3851666450500488, "incorrect_loss_per_token": 1.4230977296829224, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3851666450500488, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.3851666450500488, "logits_per_char": -0.6925833225250244, "num_chars": 2}, {"sum_logits": -1.1967670917510986, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": true, "logits_per_token": -1.1967670917510986, "logits_per_char": -0.5983835458755493, "num_chars": 2}, {"sum_logits": -1.3688253164291382, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.3688253164291382, "logits_per_char": -0.6844126582145691, "num_chars": 2}, {"sum_logits": -1.7037007808685303, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.7037007808685303, "logits_per_char": -0.8518503904342651, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.973727822303772, "incorrect_loss_raw": 1.2915452718734741, "correct_loss_per_char": 0.986863911151886, "incorrect_loss_per_char": 0.6457726359367371, "correct_loss_per_token": 1.973727822303772, "incorrect_loss_per_token": 1.2915452718734741, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0219311714172363, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": true, "logits_per_token": -1.0219311714172363, "logits_per_char": -0.5109655857086182, "num_chars": 2}, {"sum_logits": -1.3765223026275635, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.3765223026275635, "logits_per_char": -0.6882611513137817, "num_chars": 2}, {"sum_logits": -1.4761823415756226, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.4761823415756226, "logits_per_char": -0.7380911707878113, "num_chars": 2}, {"sum_logits": -1.973727822303772, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.973727822303772, "logits_per_char": -0.986863911151886, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.833831548690796, "incorrect_loss_raw": 1.2984768549601238, "correct_loss_per_char": 0.916915774345398, "incorrect_loss_per_char": 0.6492384274800619, "correct_loss_per_token": 1.833831548690796, "incorrect_loss_per_token": 1.2984768549601238, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2822133302688599, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.2822133302688599, "logits_per_char": -0.6411066651344299, "num_chars": 2}, {"sum_logits": -1.1130679845809937, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.1130679845809937, "logits_per_char": -0.5565339922904968, "num_chars": 2}, {"sum_logits": -1.5001492500305176, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.5001492500305176, "logits_per_char": -0.7500746250152588, "num_chars": 2}, {"sum_logits": -1.833831548690796, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.833831548690796, "logits_per_char": -0.916915774345398, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1855285167694092, "incorrect_loss_raw": 1.542764941851298, "correct_loss_per_char": 0.5927642583847046, "incorrect_loss_per_char": 0.771382470925649, "correct_loss_per_token": 1.1855285167694092, "incorrect_loss_per_token": 1.542764941851298, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1855285167694092, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.1855285167694092, "logits_per_char": -0.5927642583847046, "num_chars": 2}, {"sum_logits": -1.0797737836837769, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": true, "logits_per_token": -1.0797737836837769, "logits_per_char": -0.5398868918418884, "num_chars": 2}, {"sum_logits": -1.6297624111175537, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.6297624111175537, "logits_per_char": -0.8148812055587769, "num_chars": 2}, {"sum_logits": -1.9187586307525635, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.9187586307525635, "logits_per_char": -0.9593793153762817, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8684852123260498, "incorrect_loss_raw": 1.311954418818156, "correct_loss_per_char": 0.9342426061630249, "incorrect_loss_per_char": 0.655977209409078, "correct_loss_per_token": 1.8684852123260498, "incorrect_loss_per_token": 1.311954418818156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2785828113555908, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.2785828113555908, "logits_per_char": -0.6392914056777954, "num_chars": 2}, {"sum_logits": -1.118260145187378, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": true, "logits_per_token": -1.118260145187378, "logits_per_char": -0.559130072593689, "num_chars": 2}, {"sum_logits": -1.539020299911499, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.539020299911499, "logits_per_char": -0.7695101499557495, "num_chars": 2}, {"sum_logits": -1.8684852123260498, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.8684852123260498, "logits_per_char": -0.9342426061630249, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4235881567001343, "incorrect_loss_raw": 1.4407325585683186, "correct_loss_per_char": 0.7117940783500671, "incorrect_loss_per_char": 0.7203662792841593, "correct_loss_per_token": 1.4235881567001343, "incorrect_loss_per_token": 1.4407325585683186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4533942937850952, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": false, "logits_per_token": -1.4533942937850952, "logits_per_char": -0.7266971468925476, "num_chars": 2}, {"sum_logits": -1.0461838245391846, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": true, "logits_per_token": -1.0461838245391846, "logits_per_char": -0.5230919122695923, "num_chars": 2}, {"sum_logits": -1.4235881567001343, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": false, "logits_per_token": -1.4235881567001343, "logits_per_char": -0.7117940783500671, "num_chars": 2}, {"sum_logits": -1.8226195573806763, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": false, "logits_per_token": -1.8226195573806763, "logits_per_char": -0.9113097786903381, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8340904712677002, "incorrect_loss_raw": 1.312909682591756, "correct_loss_per_char": 0.9170452356338501, "incorrect_loss_per_char": 0.656454841295878, "correct_loss_per_token": 1.8340904712677002, "incorrect_loss_per_token": 1.312909682591756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5032000541687012, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.5032000541687012, "logits_per_char": -0.7516000270843506, "num_chars": 2}, {"sum_logits": -1.1682829856872559, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.1682829856872559, "logits_per_char": -0.5841414928436279, "num_chars": 2}, {"sum_logits": -1.2672460079193115, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.2672460079193115, "logits_per_char": -0.6336230039596558, "num_chars": 2}, {"sum_logits": -1.8340904712677002, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.8340904712677002, "logits_per_char": -0.9170452356338501, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6372103691101074, "incorrect_loss_raw": 1.3362020254135132, "correct_loss_per_char": 0.8186051845550537, "incorrect_loss_per_char": 0.6681010127067566, "correct_loss_per_token": 1.6372103691101074, "incorrect_loss_per_token": 1.3362020254135132, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3202414512634277, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.3202414512634277, "logits_per_char": -0.6601207256317139, "num_chars": 2}, {"sum_logits": -1.2733190059661865, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.2733190059661865, "logits_per_char": -0.6366595029830933, "num_chars": 2}, {"sum_logits": -1.4150456190109253, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.4150456190109253, "logits_per_char": -0.7075228095054626, "num_chars": 2}, {"sum_logits": -1.6372103691101074, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.6372103691101074, "logits_per_char": -0.8186051845550537, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.485413908958435, "incorrect_loss_raw": 1.4434268077214558, "correct_loss_per_char": 0.7427069544792175, "incorrect_loss_per_char": 0.7217134038607279, "correct_loss_per_token": 1.485413908958435, "incorrect_loss_per_token": 1.4434268077214558, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.146256685256958, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.146256685256958, "logits_per_char": -0.573128342628479, "num_chars": 2}, {"sum_logits": -1.2200838327407837, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.2200838327407837, "logits_per_char": -0.6100419163703918, "num_chars": 2}, {"sum_logits": -1.485413908958435, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.485413908958435, "logits_per_char": -0.7427069544792175, "num_chars": 2}, {"sum_logits": -1.963939905166626, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.963939905166626, "logits_per_char": -0.981969952583313, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7870663404464722, "incorrect_loss_raw": 1.30225666364034, "correct_loss_per_char": 0.8935331702232361, "incorrect_loss_per_char": 0.65112833182017, "correct_loss_per_token": 1.7870663404464722, "incorrect_loss_per_token": 1.30225666364034, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.271189570426941, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.271189570426941, "logits_per_char": -0.6355947852134705, "num_chars": 2}, {"sum_logits": -1.2685883045196533, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -1.2685883045196533, "logits_per_char": -0.6342941522598267, "num_chars": 2}, {"sum_logits": -1.3669921159744263, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.3669921159744263, "logits_per_char": -0.6834960579872131, "num_chars": 2}, {"sum_logits": -1.7870663404464722, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.7870663404464722, "logits_per_char": -0.8935331702232361, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7275762557983398, "incorrect_loss_raw": 1.3209373156229656, "correct_loss_per_char": 0.8637881278991699, "incorrect_loss_per_char": 0.6604686578114828, "correct_loss_per_token": 1.7275762557983398, "incorrect_loss_per_token": 1.3209373156229656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3601224422454834, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.3601224422454834, "logits_per_char": -0.6800612211227417, "num_chars": 2}, {"sum_logits": -1.1425011157989502, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": true, "logits_per_token": -1.1425011157989502, "logits_per_char": -0.5712505578994751, "num_chars": 2}, {"sum_logits": -1.460188388824463, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.460188388824463, "logits_per_char": -0.7300941944122314, "num_chars": 2}, {"sum_logits": -1.7275762557983398, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.7275762557983398, "logits_per_char": -0.8637881278991699, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8041425943374634, "incorrect_loss_raw": 1.3030364513397217, "correct_loss_per_char": 0.9020712971687317, "incorrect_loss_per_char": 0.6515182256698608, "correct_loss_per_token": 1.8041425943374634, "incorrect_loss_per_token": 1.3030364513397217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3374459743499756, "num_tokens": 1, "num_tokens_all": 611, "is_greedy": false, "logits_per_token": -1.3374459743499756, "logits_per_char": -0.6687229871749878, "num_chars": 2}, {"sum_logits": -1.113700032234192, "num_tokens": 1, "num_tokens_all": 611, "is_greedy": true, "logits_per_token": -1.113700032234192, "logits_per_char": -0.556850016117096, "num_chars": 2}, {"sum_logits": -1.4579633474349976, "num_tokens": 1, "num_tokens_all": 611, "is_greedy": false, "logits_per_token": -1.4579633474349976, "logits_per_char": -0.7289816737174988, "num_chars": 2}, {"sum_logits": -1.8041425943374634, "num_tokens": 1, "num_tokens_all": 611, "is_greedy": false, "logits_per_token": -1.8041425943374634, "logits_per_char": -0.9020712971687317, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1922736167907715, "incorrect_loss_raw": 1.5137840906778972, "correct_loss_per_char": 0.5961368083953857, "incorrect_loss_per_char": 0.7568920453389486, "correct_loss_per_token": 1.1922736167907715, "incorrect_loss_per_token": 1.5137840906778972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2607485055923462, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.2607485055923462, "logits_per_char": -0.6303742527961731, "num_chars": 2}, {"sum_logits": -1.1922736167907715, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": true, "logits_per_token": -1.1922736167907715, "logits_per_char": -0.5961368083953857, "num_chars": 2}, {"sum_logits": -1.4604707956314087, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.4604707956314087, "logits_per_char": -0.7302353978157043, "num_chars": 2}, {"sum_logits": -1.8201329708099365, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.8201329708099365, "logits_per_char": -0.9100664854049683, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3996084928512573, "incorrect_loss_raw": 1.4235410292943318, "correct_loss_per_char": 0.6998042464256287, "incorrect_loss_per_char": 0.7117705146471659, "correct_loss_per_token": 1.3996084928512573, "incorrect_loss_per_token": 1.4235410292943318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3996084928512573, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.3996084928512573, "logits_per_char": -0.6998042464256287, "num_chars": 2}, {"sum_logits": -1.130361557006836, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.130361557006836, "logits_per_char": -0.565180778503418, "num_chars": 2}, {"sum_logits": -1.521626353263855, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.521626353263855, "logits_per_char": -0.7608131766319275, "num_chars": 2}, {"sum_logits": -1.6186351776123047, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.6186351776123047, "logits_per_char": -0.8093175888061523, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4451448917388916, "incorrect_loss_raw": 1.3950633605321248, "correct_loss_per_char": 0.7225724458694458, "incorrect_loss_per_char": 0.6975316802660624, "correct_loss_per_token": 1.4451448917388916, "incorrect_loss_per_token": 1.3950633605321248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5316218137741089, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.5316218137741089, "logits_per_char": -0.7658109068870544, "num_chars": 2}, {"sum_logits": -1.2800922393798828, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": true, "logits_per_token": -1.2800922393798828, "logits_per_char": -0.6400461196899414, "num_chars": 2}, {"sum_logits": -1.4451448917388916, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.4451448917388916, "logits_per_char": -0.7225724458694458, "num_chars": 2}, {"sum_logits": -1.3734760284423828, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.3734760284423828, "logits_per_char": -0.6867380142211914, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5091394186019897, "incorrect_loss_raw": 1.413286566734314, "correct_loss_per_char": 0.7545697093009949, "incorrect_loss_per_char": 0.706643283367157, "correct_loss_per_token": 1.5091394186019897, "incorrect_loss_per_token": 1.413286566734314, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1944276094436646, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.1944276094436646, "logits_per_char": -0.5972138047218323, "num_chars": 2}, {"sum_logits": -1.1841504573822021, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": true, "logits_per_token": -1.1841504573822021, "logits_per_char": -0.5920752286911011, "num_chars": 2}, {"sum_logits": -1.5091394186019897, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.5091394186019897, "logits_per_char": -0.7545697093009949, "num_chars": 2}, {"sum_logits": -1.8612816333770752, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.8612816333770752, "logits_per_char": -0.9306408166885376, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1596202850341797, "incorrect_loss_raw": 1.6095206340154011, "correct_loss_per_char": 0.5798101425170898, "incorrect_loss_per_char": 0.8047603170077006, "correct_loss_per_token": 1.1596202850341797, "incorrect_loss_per_token": 1.6095206340154011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0749415159225464, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": true, "logits_per_token": -1.0749415159225464, "logits_per_char": -0.5374707579612732, "num_chars": 2}, {"sum_logits": -1.1596202850341797, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": false, "logits_per_token": -1.1596202850341797, "logits_per_char": -0.5798101425170898, "num_chars": 2}, {"sum_logits": -1.4919724464416504, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": false, "logits_per_token": -1.4919724464416504, "logits_per_char": -0.7459862232208252, "num_chars": 2}, {"sum_logits": -2.261647939682007, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": false, "logits_per_token": -2.261647939682007, "logits_per_char": -1.1308239698410034, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1922545433044434, "incorrect_loss_raw": 1.542373816172282, "correct_loss_per_char": 0.5961272716522217, "incorrect_loss_per_char": 0.771186908086141, "correct_loss_per_token": 1.1922545433044434, "incorrect_loss_per_token": 1.542373816172282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2633581161499023, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.2633581161499023, "logits_per_char": -0.6316790580749512, "num_chars": 2}, {"sum_logits": -1.1922545433044434, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.1922545433044434, "logits_per_char": -0.5961272716522217, "num_chars": 2}, {"sum_logits": -1.3608551025390625, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.3608551025390625, "logits_per_char": -0.6804275512695312, "num_chars": 2}, {"sum_logits": -2.002908229827881, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -2.002908229827881, "logits_per_char": -1.0014541149139404, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1153864860534668, "incorrect_loss_raw": 1.547511339187622, "correct_loss_per_char": 0.5576932430267334, "incorrect_loss_per_char": 0.773755669593811, "correct_loss_per_token": 1.1153864860534668, "incorrect_loss_per_token": 1.547511339187622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3336846828460693, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.3336846828460693, "logits_per_char": -0.6668423414230347, "num_chars": 2}, {"sum_logits": -1.1153864860534668, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": true, "logits_per_token": -1.1153864860534668, "logits_per_char": -0.5576932430267334, "num_chars": 2}, {"sum_logits": -1.4933589696884155, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.4933589696884155, "logits_per_char": -0.7466794848442078, "num_chars": 2}, {"sum_logits": -1.8154903650283813, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.8154903650283813, "logits_per_char": -0.9077451825141907, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.592706322669983, "incorrect_loss_raw": 1.4098604122797649, "correct_loss_per_char": 0.7963531613349915, "incorrect_loss_per_char": 0.7049302061398824, "correct_loss_per_token": 1.592706322669983, "incorrect_loss_per_token": 1.4098604122797649, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1162720918655396, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": true, "logits_per_token": -1.1162720918655396, "logits_per_char": -0.5581360459327698, "num_chars": 2}, {"sum_logits": -1.1945769786834717, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.1945769786834717, "logits_per_char": -0.5972884893417358, "num_chars": 2}, {"sum_logits": -1.592706322669983, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.592706322669983, "logits_per_char": -0.7963531613349915, "num_chars": 2}, {"sum_logits": -1.9187321662902832, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.9187321662902832, "logits_per_char": -0.9593660831451416, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2538352012634277, "incorrect_loss_raw": 1.4827612241109211, "correct_loss_per_char": 0.6269176006317139, "incorrect_loss_per_char": 0.7413806120554606, "correct_loss_per_token": 1.2538352012634277, "incorrect_loss_per_token": 1.4827612241109211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.268471598625183, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.268471598625183, "logits_per_char": -0.6342357993125916, "num_chars": 2}, {"sum_logits": -1.2538352012634277, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.2538352012634277, "logits_per_char": -0.6269176006317139, "num_chars": 2}, {"sum_logits": -1.3967797756195068, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.3967797756195068, "logits_per_char": -0.6983898878097534, "num_chars": 2}, {"sum_logits": -1.7830322980880737, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.7830322980880737, "logits_per_char": -0.8915161490440369, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.738129734992981, "incorrect_loss_raw": 1.3372552792231243, "correct_loss_per_char": 0.8690648674964905, "incorrect_loss_per_char": 0.6686276396115621, "correct_loss_per_token": 1.738129734992981, "incorrect_loss_per_token": 1.3372552792231243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.327396035194397, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.327396035194397, "logits_per_char": -0.6636980175971985, "num_chars": 2}, {"sum_logits": -1.1140491962432861, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": true, "logits_per_token": -1.1140491962432861, "logits_per_char": -0.5570245981216431, "num_chars": 2}, {"sum_logits": -1.5703206062316895, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.5703206062316895, "logits_per_char": -0.7851603031158447, "num_chars": 2}, {"sum_logits": -1.738129734992981, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.738129734992981, "logits_per_char": -0.8690648674964905, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5016266107559204, "incorrect_loss_raw": 1.4140228430430095, "correct_loss_per_char": 0.7508133053779602, "incorrect_loss_per_char": 0.7070114215215048, "correct_loss_per_token": 1.5016266107559204, "incorrect_loss_per_token": 1.4140228430430095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3536324501037598, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.3536324501037598, "logits_per_char": -0.6768162250518799, "num_chars": 2}, {"sum_logits": -1.0676339864730835, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": true, "logits_per_token": -1.0676339864730835, "logits_per_char": -0.5338169932365417, "num_chars": 2}, {"sum_logits": -1.5016266107559204, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.5016266107559204, "logits_per_char": -0.7508133053779602, "num_chars": 2}, {"sum_logits": -1.820802092552185, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.820802092552185, "logits_per_char": -0.9104010462760925, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4867916107177734, "incorrect_loss_raw": 1.3731477657953899, "correct_loss_per_char": 0.7433958053588867, "incorrect_loss_per_char": 0.6865738828976949, "correct_loss_per_token": 1.4867916107177734, "incorrect_loss_per_token": 1.3731477657953899, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.396165132522583, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": false, "logits_per_token": -1.396165132522583, "logits_per_char": -0.6980825662612915, "num_chars": 2}, {"sum_logits": -1.3188717365264893, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": true, "logits_per_token": -1.3188717365264893, "logits_per_char": -0.6594358682632446, "num_chars": 2}, {"sum_logits": -1.4044064283370972, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": false, "logits_per_token": -1.4044064283370972, "logits_per_char": -0.7022032141685486, "num_chars": 2}, {"sum_logits": -1.4867916107177734, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": false, "logits_per_token": -1.4867916107177734, "logits_per_char": -0.7433958053588867, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5010079145431519, "incorrect_loss_raw": 1.4287246068318684, "correct_loss_per_char": 0.7505039572715759, "incorrect_loss_per_char": 0.7143623034159342, "correct_loss_per_token": 1.5010079145431519, "incorrect_loss_per_token": 1.4287246068318684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1710134744644165, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.1710134744644165, "logits_per_char": -0.5855067372322083, "num_chars": 2}, {"sum_logits": -1.1808907985687256, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.1808907985687256, "logits_per_char": -0.5904453992843628, "num_chars": 2}, {"sum_logits": -1.5010079145431519, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.5010079145431519, "logits_per_char": -0.7505039572715759, "num_chars": 2}, {"sum_logits": -1.9342695474624634, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.9342695474624634, "logits_per_char": -0.9671347737312317, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6835182905197144, "incorrect_loss_raw": 1.3740817705790203, "correct_loss_per_char": 0.8417591452598572, "incorrect_loss_per_char": 0.6870408852895101, "correct_loss_per_token": 1.6835182905197144, "incorrect_loss_per_token": 1.3740817705790203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2765376567840576, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.2765376567840576, "logits_per_char": -0.6382688283920288, "num_chars": 2}, {"sum_logits": -1.027174472808838, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": true, "logits_per_token": -1.027174472808838, "logits_per_char": -0.513587236404419, "num_chars": 2}, {"sum_logits": -1.6835182905197144, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.6835182905197144, "logits_per_char": -0.8417591452598572, "num_chars": 2}, {"sum_logits": -1.818533182144165, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.818533182144165, "logits_per_char": -0.9092665910720825, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2485933303833008, "incorrect_loss_raw": 1.4924416939417522, "correct_loss_per_char": 0.6242966651916504, "incorrect_loss_per_char": 0.7462208469708761, "correct_loss_per_token": 1.2485933303833008, "incorrect_loss_per_token": 1.4924416939417522, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2485933303833008, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.2485933303833008, "logits_per_char": -0.6242966651916504, "num_chars": 2}, {"sum_logits": -1.1956192255020142, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": true, "logits_per_token": -1.1956192255020142, "logits_per_char": -0.5978096127510071, "num_chars": 2}, {"sum_logits": -1.4287739992141724, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.4287739992141724, "logits_per_char": -0.7143869996070862, "num_chars": 2}, {"sum_logits": -1.8529318571090698, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.8529318571090698, "logits_per_char": -0.9264659285545349, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.457237720489502, "incorrect_loss_raw": 1.4274555444717407, "correct_loss_per_char": 0.728618860244751, "incorrect_loss_per_char": 0.7137277722358704, "correct_loss_per_token": 1.457237720489502, "incorrect_loss_per_token": 1.4274555444717407, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.457237720489502, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.457237720489502, "logits_per_char": -0.728618860244751, "num_chars": 2}, {"sum_logits": -1.0566774606704712, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": true, "logits_per_token": -1.0566774606704712, "logits_per_char": -0.5283387303352356, "num_chars": 2}, {"sum_logits": -1.4388048648834229, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.4388048648834229, "logits_per_char": -0.7194024324417114, "num_chars": 2}, {"sum_logits": -1.7868843078613281, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.7868843078613281, "logits_per_char": -0.8934421539306641, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1260840892791748, "incorrect_loss_raw": 1.5431288878122966, "correct_loss_per_char": 0.5630420446395874, "incorrect_loss_per_char": 0.7715644439061483, "correct_loss_per_token": 1.1260840892791748, "incorrect_loss_per_token": 1.5431288878122966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2782635688781738, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.2782635688781738, "logits_per_char": -0.6391317844390869, "num_chars": 2}, {"sum_logits": -1.1260840892791748, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.1260840892791748, "logits_per_char": -0.5630420446395874, "num_chars": 2}, {"sum_logits": -1.4561474323272705, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4561474323272705, "logits_per_char": -0.7280737161636353, "num_chars": 2}, {"sum_logits": -1.8949756622314453, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.8949756622314453, "logits_per_char": -0.9474878311157227, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7331430912017822, "incorrect_loss_raw": 1.4060176213582356, "correct_loss_per_char": 0.8665715456008911, "incorrect_loss_per_char": 0.7030088106791178, "correct_loss_per_token": 1.7331430912017822, "incorrect_loss_per_token": 1.4060176213582356, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.064112663269043, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": true, "logits_per_token": -1.064112663269043, "logits_per_char": -0.5320563316345215, "num_chars": 2}, {"sum_logits": -1.116931676864624, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.116931676864624, "logits_per_char": -0.558465838432312, "num_chars": 2}, {"sum_logits": -1.7331430912017822, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.7331430912017822, "logits_per_char": -0.8665715456008911, "num_chars": 2}, {"sum_logits": -2.03700852394104, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -2.03700852394104, "logits_per_char": -1.01850426197052, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3770583868026733, "incorrect_loss_raw": 1.437790075937907, "correct_loss_per_char": 0.6885291934013367, "incorrect_loss_per_char": 0.7188950379689535, "correct_loss_per_token": 1.3770583868026733, "incorrect_loss_per_token": 1.437790075937907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.225223422050476, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": true, "logits_per_token": -1.225223422050476, "logits_per_char": -0.612611711025238, "num_chars": 2}, {"sum_logits": -1.2849699258804321, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.2849699258804321, "logits_per_char": -0.6424849629402161, "num_chars": 2}, {"sum_logits": -1.3770583868026733, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.3770583868026733, "logits_per_char": -0.6885291934013367, "num_chars": 2}, {"sum_logits": -1.8031768798828125, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.8031768798828125, "logits_per_char": -0.9015884399414062, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5926706790924072, "incorrect_loss_raw": 1.4005074898401897, "correct_loss_per_char": 0.7963353395462036, "incorrect_loss_per_char": 0.7002537449200948, "correct_loss_per_token": 1.5926706790924072, "incorrect_loss_per_token": 1.4005074898401897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.264812707901001, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.264812707901001, "logits_per_char": -0.6324063539505005, "num_chars": 2}, {"sum_logits": -1.1235579252243042, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": true, "logits_per_token": -1.1235579252243042, "logits_per_char": -0.5617789626121521, "num_chars": 2}, {"sum_logits": -1.5926706790924072, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.5926706790924072, "logits_per_char": -0.7963353395462036, "num_chars": 2}, {"sum_logits": -1.8131518363952637, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.8131518363952637, "logits_per_char": -0.9065759181976318, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4898149967193604, "incorrect_loss_raw": 1.3989446957906086, "correct_loss_per_char": 0.7449074983596802, "incorrect_loss_per_char": 0.6994723478953043, "correct_loss_per_token": 1.4898149967193604, "incorrect_loss_per_token": 1.3989446957906086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4898149967193604, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.4898149967193604, "logits_per_char": -0.7449074983596802, "num_chars": 2}, {"sum_logits": -1.0978376865386963, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": true, "logits_per_token": -1.0978376865386963, "logits_per_char": -0.5489188432693481, "num_chars": 2}, {"sum_logits": -1.469578504562378, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.469578504562378, "logits_per_char": -0.734789252281189, "num_chars": 2}, {"sum_logits": -1.629417896270752, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.629417896270752, "logits_per_char": -0.814708948135376, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.374167799949646, "incorrect_loss_raw": 1.4595327377319336, "correct_loss_per_char": 0.687083899974823, "incorrect_loss_per_char": 0.7297663688659668, "correct_loss_per_token": 1.374167799949646, "incorrect_loss_per_token": 1.4595327377319336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.374167799949646, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": false, "logits_per_token": -1.374167799949646, "logits_per_char": -0.687083899974823, "num_chars": 2}, {"sum_logits": -1.09879732131958, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": true, "logits_per_token": -1.09879732131958, "logits_per_char": -0.54939866065979, "num_chars": 2}, {"sum_logits": -1.3890948295593262, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": false, "logits_per_token": -1.3890948295593262, "logits_per_char": -0.6945474147796631, "num_chars": 2}, {"sum_logits": -1.8907060623168945, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": false, "logits_per_token": -1.8907060623168945, "logits_per_char": -0.9453530311584473, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4004402160644531, "incorrect_loss_raw": 1.4524595737457275, "correct_loss_per_char": 0.7002201080322266, "incorrect_loss_per_char": 0.7262297868728638, "correct_loss_per_token": 1.4004402160644531, "incorrect_loss_per_token": 1.4524595737457275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4004402160644531, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.4004402160644531, "logits_per_char": -0.7002201080322266, "num_chars": 2}, {"sum_logits": -1.0233094692230225, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.0233094692230225, "logits_per_char": -0.5116547346115112, "num_chars": 2}, {"sum_logits": -1.5475966930389404, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.5475966930389404, "logits_per_char": -0.7737983465194702, "num_chars": 2}, {"sum_logits": -1.7864725589752197, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.7864725589752197, "logits_per_char": -0.8932362794876099, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2326109409332275, "incorrect_loss_raw": 1.518454670906067, "correct_loss_per_char": 0.6163054704666138, "incorrect_loss_per_char": 0.7592273354530334, "correct_loss_per_token": 1.2326109409332275, "incorrect_loss_per_token": 1.518454670906067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2326109409332275, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.2326109409332275, "logits_per_char": -0.6163054704666138, "num_chars": 2}, {"sum_logits": -1.1054459810256958, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": true, "logits_per_token": -1.1054459810256958, "logits_per_char": -0.5527229905128479, "num_chars": 2}, {"sum_logits": -1.5505704879760742, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.5505704879760742, "logits_per_char": -0.7752852439880371, "num_chars": 2}, {"sum_logits": -1.8993475437164307, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.8993475437164307, "logits_per_char": -0.9496737718582153, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.52362060546875, "incorrect_loss_raw": 1.4059962431589763, "correct_loss_per_char": 0.761810302734375, "incorrect_loss_per_char": 0.7029981215794882, "correct_loss_per_token": 1.52362060546875, "incorrect_loss_per_token": 1.4059962431589763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4357943534851074, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.4357943534851074, "logits_per_char": -0.7178971767425537, "num_chars": 2}, {"sum_logits": -1.0357122421264648, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": true, "logits_per_token": -1.0357122421264648, "logits_per_char": -0.5178561210632324, "num_chars": 2}, {"sum_logits": -1.52362060546875, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.52362060546875, "logits_per_char": -0.761810302734375, "num_chars": 2}, {"sum_logits": -1.7464821338653564, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.7464821338653564, "logits_per_char": -0.8732410669326782, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1939666271209717, "incorrect_loss_raw": 1.5044515132904053, "correct_loss_per_char": 0.5969833135604858, "incorrect_loss_per_char": 0.7522257566452026, "correct_loss_per_token": 1.1939666271209717, "incorrect_loss_per_token": 1.5044515132904053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3027583360671997, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.3027583360671997, "logits_per_char": -0.6513791680335999, "num_chars": 2}, {"sum_logits": -1.1939666271209717, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": true, "logits_per_token": -1.1939666271209717, "logits_per_char": -0.5969833135604858, "num_chars": 2}, {"sum_logits": -1.43007230758667, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.43007230758667, "logits_per_char": -0.715036153793335, "num_chars": 2}, {"sum_logits": -1.7805238962173462, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.7805238962173462, "logits_per_char": -0.8902619481086731, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.594058632850647, "incorrect_loss_raw": 1.3550732930501301, "correct_loss_per_char": 0.7970293164253235, "incorrect_loss_per_char": 0.6775366465250651, "correct_loss_per_token": 1.594058632850647, "incorrect_loss_per_token": 1.3550732930501301, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.358309268951416, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.358309268951416, "logits_per_char": -0.679154634475708, "num_chars": 2}, {"sum_logits": -1.288031816482544, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": true, "logits_per_token": -1.288031816482544, "logits_per_char": -0.644015908241272, "num_chars": 2}, {"sum_logits": -1.594058632850647, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.594058632850647, "logits_per_char": -0.7970293164253235, "num_chars": 2}, {"sum_logits": -1.4188787937164307, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.4188787937164307, "logits_per_char": -0.7094393968582153, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7027587890625, "incorrect_loss_raw": 1.3314669926961262, "correct_loss_per_char": 0.85137939453125, "incorrect_loss_per_char": 0.6657334963480631, "correct_loss_per_token": 1.7027587890625, "incorrect_loss_per_token": 1.3314669926961262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3907136917114258, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.3907136917114258, "logits_per_char": -0.6953568458557129, "num_chars": 2}, {"sum_logits": -1.1478559970855713, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.1478559970855713, "logits_per_char": -0.5739279985427856, "num_chars": 2}, {"sum_logits": -1.4558312892913818, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.4558312892913818, "logits_per_char": -0.7279156446456909, "num_chars": 2}, {"sum_logits": -1.7027587890625, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.7027587890625, "logits_per_char": -0.85137939453125, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7820416688919067, "incorrect_loss_raw": 1.312366525332133, "correct_loss_per_char": 0.8910208344459534, "incorrect_loss_per_char": 0.6561832626660665, "correct_loss_per_token": 1.7820416688919067, "incorrect_loss_per_token": 1.312366525332133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1809138059616089, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": true, "logits_per_token": -1.1809138059616089, "logits_per_char": -0.5904569029808044, "num_chars": 2}, {"sum_logits": -1.2297345399856567, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.2297345399856567, "logits_per_char": -0.6148672699928284, "num_chars": 2}, {"sum_logits": -1.5264512300491333, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.5264512300491333, "logits_per_char": -0.7632256150245667, "num_chars": 2}, {"sum_logits": -1.7820416688919067, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.7820416688919067, "logits_per_char": -0.8910208344459534, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1164095401763916, "incorrect_loss_raw": 1.5286232233047485, "correct_loss_per_char": 0.5582047700881958, "incorrect_loss_per_char": 0.7643116116523743, "correct_loss_per_token": 1.1164095401763916, "incorrect_loss_per_token": 1.5286232233047485, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3362261056900024, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.3362261056900024, "logits_per_char": -0.6681130528450012, "num_chars": 2}, {"sum_logits": -1.1164095401763916, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.1164095401763916, "logits_per_char": -0.5582047700881958, "num_chars": 2}, {"sum_logits": -1.5389509201049805, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5389509201049805, "logits_per_char": -0.7694754600524902, "num_chars": 2}, {"sum_logits": -1.7106926441192627, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.7106926441192627, "logits_per_char": -0.8553463220596313, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5038821697235107, "incorrect_loss_raw": 1.3839060068130493, "correct_loss_per_char": 0.7519410848617554, "incorrect_loss_per_char": 0.6919530034065247, "correct_loss_per_token": 1.5038821697235107, "incorrect_loss_per_token": 1.3839060068130493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5038821697235107, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.5038821697235107, "logits_per_char": -0.7519410848617554, "num_chars": 2}, {"sum_logits": -1.1837866306304932, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": true, "logits_per_token": -1.1837866306304932, "logits_per_char": -0.5918933153152466, "num_chars": 2}, {"sum_logits": -1.364281177520752, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.364281177520752, "logits_per_char": -0.682140588760376, "num_chars": 2}, {"sum_logits": -1.6036502122879028, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.6036502122879028, "logits_per_char": -0.8018251061439514, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6633027791976929, "incorrect_loss_raw": 1.3503487507502239, "correct_loss_per_char": 0.8316513895988464, "incorrect_loss_per_char": 0.6751743753751119, "correct_loss_per_token": 1.6633027791976929, "incorrect_loss_per_token": 1.3503487507502239, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3891291618347168, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.3891291618347168, "logits_per_char": -0.6945645809173584, "num_chars": 2}, {"sum_logits": -1.1216269731521606, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.1216269731521606, "logits_per_char": -0.5608134865760803, "num_chars": 2}, {"sum_logits": -1.540290117263794, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.540290117263794, "logits_per_char": -0.770145058631897, "num_chars": 2}, {"sum_logits": -1.6633027791976929, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.6633027791976929, "logits_per_char": -0.8316513895988464, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.089745044708252, "incorrect_loss_raw": 1.5797088940938313, "correct_loss_per_char": 0.544872522354126, "incorrect_loss_per_char": 0.7898544470469157, "correct_loss_per_token": 1.089745044708252, "incorrect_loss_per_token": 1.5797088940938313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.089745044708252, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": true, "logits_per_token": -1.089745044708252, "logits_per_char": -0.544872522354126, "num_chars": 2}, {"sum_logits": -1.1683063507080078, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.1683063507080078, "logits_per_char": -0.5841531753540039, "num_chars": 2}, {"sum_logits": -1.6524509191513062, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.6524509191513062, "logits_per_char": -0.8262254595756531, "num_chars": 2}, {"sum_logits": -1.9183694124221802, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.9183694124221802, "logits_per_char": -0.9591847062110901, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0689724683761597, "incorrect_loss_raw": 1.581134517987569, "correct_loss_per_char": 0.5344862341880798, "incorrect_loss_per_char": 0.7905672589937845, "correct_loss_per_token": 1.0689724683761597, "incorrect_loss_per_token": 1.581134517987569, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.282533884048462, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.282533884048462, "logits_per_char": -0.641266942024231, "num_chars": 2}, {"sum_logits": -1.0689724683761597, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": true, "logits_per_token": -1.0689724683761597, "logits_per_char": -0.5344862341880798, "num_chars": 2}, {"sum_logits": -1.4761526584625244, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.4761526584625244, "logits_per_char": -0.7380763292312622, "num_chars": 2}, {"sum_logits": -1.9847170114517212, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.9847170114517212, "logits_per_char": -0.9923585057258606, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7157853841781616, "incorrect_loss_raw": 1.325936198234558, "correct_loss_per_char": 0.8578926920890808, "incorrect_loss_per_char": 0.662968099117279, "correct_loss_per_token": 1.7157853841781616, "incorrect_loss_per_token": 1.325936198234558, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3464049100875854, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.3464049100875854, "logits_per_char": -0.6732024550437927, "num_chars": 2}, {"sum_logits": -1.173713207244873, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": true, "logits_per_token": -1.173713207244873, "logits_per_char": -0.5868566036224365, "num_chars": 2}, {"sum_logits": -1.4576904773712158, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.4576904773712158, "logits_per_char": -0.7288452386856079, "num_chars": 2}, {"sum_logits": -1.7157853841781616, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.7157853841781616, "logits_per_char": -0.8578926920890808, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.580981969833374, "incorrect_loss_raw": 1.4036082824071248, "correct_loss_per_char": 0.790490984916687, "incorrect_loss_per_char": 0.7018041412035624, "correct_loss_per_token": 1.580981969833374, "incorrect_loss_per_token": 1.4036082824071248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2182649374008179, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.2182649374008179, "logits_per_char": -0.6091324687004089, "num_chars": 2}, {"sum_logits": -1.1295446157455444, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.1295446157455444, "logits_per_char": -0.5647723078727722, "num_chars": 2}, {"sum_logits": -1.580981969833374, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.580981969833374, "logits_per_char": -0.790490984916687, "num_chars": 2}, {"sum_logits": -1.8630152940750122, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.8630152940750122, "logits_per_char": -0.9315076470375061, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.586359977722168, "incorrect_loss_raw": 1.3841959635416667, "correct_loss_per_char": 0.793179988861084, "incorrect_loss_per_char": 0.6920979817708334, "correct_loss_per_token": 1.586359977722168, "incorrect_loss_per_token": 1.3841959635416667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1587620973587036, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": true, "logits_per_token": -1.1587620973587036, "logits_per_char": -0.5793810486793518, "num_chars": 2}, {"sum_logits": -1.2048401832580566, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.2048401832580566, "logits_per_char": -0.6024200916290283, "num_chars": 2}, {"sum_logits": -1.586359977722168, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.586359977722168, "logits_per_char": -0.793179988861084, "num_chars": 2}, {"sum_logits": -1.7889856100082397, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.7889856100082397, "logits_per_char": -0.8944928050041199, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.306803822517395, "incorrect_loss_raw": 1.47065003712972, "correct_loss_per_char": 0.6534019112586975, "incorrect_loss_per_char": 0.73532501856486, "correct_loss_per_token": 1.306803822517395, "incorrect_loss_per_token": 1.47065003712972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2539862394332886, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": true, "logits_per_token": -1.2539862394332886, "logits_per_char": -0.6269931197166443, "num_chars": 2}, {"sum_logits": -1.32587468624115, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.32587468624115, "logits_per_char": -0.662937343120575, "num_chars": 2}, {"sum_logits": -1.306803822517395, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.306803822517395, "logits_per_char": -0.6534019112586975, "num_chars": 2}, {"sum_logits": -1.8320891857147217, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.8320891857147217, "logits_per_char": -0.9160445928573608, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 100, "native_id": 100, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2395896911621094, "incorrect_loss_raw": 1.507582704226176, "correct_loss_per_char": 0.6197948455810547, "incorrect_loss_per_char": 0.753791352113088, "correct_loss_per_token": 1.2395896911621094, "incorrect_loss_per_token": 1.507582704226176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2395896911621094, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.2395896911621094, "logits_per_char": -0.6197948455810547, "num_chars": 2}, {"sum_logits": -1.1625216007232666, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.1625216007232666, "logits_per_char": -0.5812608003616333, "num_chars": 2}, {"sum_logits": -1.4732677936553955, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4732677936553955, "logits_per_char": -0.7366338968276978, "num_chars": 2}, {"sum_logits": -1.8869587182998657, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.8869587182998657, "logits_per_char": -0.9434793591499329, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 101, "native_id": 101, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8216029405593872, "incorrect_loss_raw": 1.3229586680730183, "correct_loss_per_char": 0.9108014702796936, "incorrect_loss_per_char": 0.6614793340365092, "correct_loss_per_token": 1.8216029405593872, "incorrect_loss_per_token": 1.3229586680730183, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.181541085243225, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.181541085243225, "logits_per_char": -0.5907705426216125, "num_chars": 2}, {"sum_logits": -1.087824821472168, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.087824821472168, "logits_per_char": -0.543912410736084, "num_chars": 2}, {"sum_logits": -1.699510097503662, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.699510097503662, "logits_per_char": -0.849755048751831, "num_chars": 2}, {"sum_logits": -1.8216029405593872, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.8216029405593872, "logits_per_char": -0.9108014702796936, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 102, "native_id": 102, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4720795154571533, "incorrect_loss_raw": 1.4528218905131023, "correct_loss_per_char": 0.7360397577285767, "incorrect_loss_per_char": 0.7264109452565511, "correct_loss_per_token": 1.4720795154571533, "incorrect_loss_per_token": 1.4528218905131023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0924878120422363, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": true, "logits_per_token": -1.0924878120422363, "logits_per_char": -0.5462439060211182, "num_chars": 2}, {"sum_logits": -1.2608070373535156, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.2608070373535156, "logits_per_char": -0.6304035186767578, "num_chars": 2}, {"sum_logits": -1.4720795154571533, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.4720795154571533, "logits_per_char": -0.7360397577285767, "num_chars": 2}, {"sum_logits": -2.0051708221435547, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -2.0051708221435547, "logits_per_char": -1.0025854110717773, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 103, "native_id": 103, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.956148386001587, "incorrect_loss_raw": 1.3329137961069744, "correct_loss_per_char": 0.9780741930007935, "incorrect_loss_per_char": 0.6664568980534872, "correct_loss_per_token": 1.956148386001587, "incorrect_loss_per_token": 1.3329137961069744, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.089046597480774, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.089046597480774, "logits_per_char": -0.544523298740387, "num_chars": 2}, {"sum_logits": -1.1551049947738647, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.1551049947738647, "logits_per_char": -0.5775524973869324, "num_chars": 2}, {"sum_logits": -1.7545897960662842, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.7545897960662842, "logits_per_char": -0.8772948980331421, "num_chars": 2}, {"sum_logits": -1.956148386001587, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.956148386001587, "logits_per_char": -0.9780741930007935, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 104, "native_id": 104, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4896471500396729, "incorrect_loss_raw": 1.4533421595891316, "correct_loss_per_char": 0.7448235750198364, "incorrect_loss_per_char": 0.7266710797945658, "correct_loss_per_token": 1.4896471500396729, "incorrect_loss_per_token": 1.4533421595891316, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2027148008346558, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.2027148008346558, "logits_per_char": -0.6013574004173279, "num_chars": 2}, {"sum_logits": -1.1319725513458252, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": true, "logits_per_token": -1.1319725513458252, "logits_per_char": -0.5659862756729126, "num_chars": 2}, {"sum_logits": -1.4896471500396729, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.4896471500396729, "logits_per_char": -0.7448235750198364, "num_chars": 2}, {"sum_logits": -2.025339126586914, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -2.025339126586914, "logits_per_char": -1.012669563293457, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 105, "native_id": 105, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4096297025680542, "incorrect_loss_raw": 1.4369940757751465, "correct_loss_per_char": 0.7048148512840271, "incorrect_loss_per_char": 0.7184970378875732, "correct_loss_per_token": 1.4096297025680542, "incorrect_loss_per_token": 1.4369940757751465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4096297025680542, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.4096297025680542, "logits_per_char": -0.7048148512840271, "num_chars": 2}, {"sum_logits": -1.287006139755249, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.287006139755249, "logits_per_char": -0.6435030698776245, "num_chars": 2}, {"sum_logits": -1.1524379253387451, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": true, "logits_per_token": -1.1524379253387451, "logits_per_char": -0.5762189626693726, "num_chars": 2}, {"sum_logits": -1.8715381622314453, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.8715381622314453, "logits_per_char": -0.9357690811157227, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 106, "native_id": 106, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2988463640213013, "incorrect_loss_raw": 1.449622352917989, "correct_loss_per_char": 0.6494231820106506, "incorrect_loss_per_char": 0.7248111764589945, "correct_loss_per_token": 1.2988463640213013, "incorrect_loss_per_token": 1.449622352917989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3353266716003418, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.3353266716003418, "logits_per_char": -0.6676633358001709, "num_chars": 2}, {"sum_logits": -1.2988463640213013, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": true, "logits_per_token": -1.2988463640213013, "logits_per_char": -0.6494231820106506, "num_chars": 2}, {"sum_logits": -1.4107011556625366, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.4107011556625366, "logits_per_char": -0.7053505778312683, "num_chars": 2}, {"sum_logits": -1.6028392314910889, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.6028392314910889, "logits_per_char": -0.8014196157455444, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 107, "native_id": 107, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.48677396774292, "incorrect_loss_raw": 1.4055545727411907, "correct_loss_per_char": 0.74338698387146, "incorrect_loss_per_char": 0.7027772863705953, "correct_loss_per_token": 1.48677396774292, "incorrect_loss_per_token": 1.4055545727411907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2144267559051514, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.2144267559051514, "logits_per_char": -0.6072133779525757, "num_chars": 2}, {"sum_logits": -1.212263584136963, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": true, "logits_per_token": -1.212263584136963, "logits_per_char": -0.6061317920684814, "num_chars": 2}, {"sum_logits": -1.48677396774292, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.48677396774292, "logits_per_char": -0.74338698387146, "num_chars": 2}, {"sum_logits": -1.7899733781814575, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.7899733781814575, "logits_per_char": -0.8949866890907288, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 108, "native_id": 108, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7448906898498535, "incorrect_loss_raw": 1.3175156513849895, "correct_loss_per_char": 0.8724453449249268, "incorrect_loss_per_char": 0.6587578256924947, "correct_loss_per_token": 1.7448906898498535, "incorrect_loss_per_token": 1.3175156513849895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3655320405960083, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.3655320405960083, "logits_per_char": -0.6827660202980042, "num_chars": 2}, {"sum_logits": -1.1255261898040771, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": true, "logits_per_token": -1.1255261898040771, "logits_per_char": -0.5627630949020386, "num_chars": 2}, {"sum_logits": -1.4614887237548828, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.4614887237548828, "logits_per_char": -0.7307443618774414, "num_chars": 2}, {"sum_logits": -1.7448906898498535, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.7448906898498535, "logits_per_char": -0.8724453449249268, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 109, "native_id": 109, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1356734037399292, "incorrect_loss_raw": 1.5263277689615886, "correct_loss_per_char": 0.5678367018699646, "incorrect_loss_per_char": 0.7631638844807943, "correct_loss_per_token": 1.1356734037399292, "incorrect_loss_per_token": 1.5263277689615886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3477054834365845, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.3477054834365845, "logits_per_char": -0.6738527417182922, "num_chars": 2}, {"sum_logits": -1.1356734037399292, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": true, "logits_per_token": -1.1356734037399292, "logits_per_char": -0.5678367018699646, "num_chars": 2}, {"sum_logits": -1.4455382823944092, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.4455382823944092, "logits_per_char": -0.7227691411972046, "num_chars": 2}, {"sum_logits": -1.785739541053772, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.785739541053772, "logits_per_char": -0.892869770526886, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 110, "native_id": 110, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1333941221237183, "incorrect_loss_raw": 1.551593542098999, "correct_loss_per_char": 0.5666970610618591, "incorrect_loss_per_char": 0.7757967710494995, "correct_loss_per_token": 1.1333941221237183, "incorrect_loss_per_token": 1.551593542098999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1851589679718018, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.1851589679718018, "logits_per_char": -0.5925794839859009, "num_chars": 2}, {"sum_logits": -1.1333941221237183, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": true, "logits_per_token": -1.1333941221237183, "logits_per_char": -0.5666970610618591, "num_chars": 2}, {"sum_logits": -1.5936108827590942, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.5936108827590942, "logits_per_char": -0.7968054413795471, "num_chars": 2}, {"sum_logits": -1.876010775566101, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.876010775566101, "logits_per_char": -0.9380053877830505, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 111, "native_id": 111, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9417420625686646, "incorrect_loss_raw": 1.2772694826126099, "correct_loss_per_char": 0.9708710312843323, "incorrect_loss_per_char": 0.6386347413063049, "correct_loss_per_token": 1.9417420625686646, "incorrect_loss_per_token": 1.2772694826126099, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2778486013412476, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": false, "logits_per_token": -1.2778486013412476, "logits_per_char": -0.6389243006706238, "num_chars": 2}, {"sum_logits": -1.1799858808517456, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": true, "logits_per_token": -1.1799858808517456, "logits_per_char": -0.5899929404258728, "num_chars": 2}, {"sum_logits": -1.3739739656448364, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": false, "logits_per_token": -1.3739739656448364, "logits_per_char": -0.6869869828224182, "num_chars": 2}, {"sum_logits": -1.9417420625686646, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": false, "logits_per_token": -1.9417420625686646, "logits_per_char": -0.9708710312843323, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 112, "native_id": 112, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.702186942100525, "incorrect_loss_raw": 1.331616719563802, "correct_loss_per_char": 0.8510934710502625, "incorrect_loss_per_char": 0.665808359781901, "correct_loss_per_token": 1.702186942100525, "incorrect_loss_per_token": 1.331616719563802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3112229108810425, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.3112229108810425, "logits_per_char": -0.6556114554405212, "num_chars": 2}, {"sum_logits": -1.126761555671692, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.126761555671692, "logits_per_char": -0.563380777835846, "num_chars": 2}, {"sum_logits": -1.5568656921386719, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5568656921386719, "logits_per_char": -0.7784328460693359, "num_chars": 2}, {"sum_logits": -1.702186942100525, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.702186942100525, "logits_per_char": -0.8510934710502625, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 113, "native_id": 113, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4721765518188477, "incorrect_loss_raw": 1.394960641860962, "correct_loss_per_char": 0.7360882759094238, "incorrect_loss_per_char": 0.697480320930481, "correct_loss_per_token": 1.4721765518188477, "incorrect_loss_per_token": 1.394960641860962, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.456063985824585, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.456063985824585, "logits_per_char": -0.7280319929122925, "num_chars": 2}, {"sum_logits": -1.1608399152755737, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": true, "logits_per_token": -1.1608399152755737, "logits_per_char": -0.5804199576377869, "num_chars": 2}, {"sum_logits": -1.4721765518188477, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.4721765518188477, "logits_per_char": -0.7360882759094238, "num_chars": 2}, {"sum_logits": -1.567978024482727, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.567978024482727, "logits_per_char": -0.7839890122413635, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 114, "native_id": 114, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.474457025527954, "incorrect_loss_raw": 1.4059821367263794, "correct_loss_per_char": 0.737228512763977, "incorrect_loss_per_char": 0.7029910683631897, "correct_loss_per_token": 1.474457025527954, "incorrect_loss_per_token": 1.4059821367263794, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.474457025527954, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.474457025527954, "logits_per_char": -0.737228512763977, "num_chars": 2}, {"sum_logits": -1.0882905721664429, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": true, "logits_per_token": -1.0882905721664429, "logits_per_char": -0.5441452860832214, "num_chars": 2}, {"sum_logits": -1.416721224784851, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.416721224784851, "logits_per_char": -0.7083606123924255, "num_chars": 2}, {"sum_logits": -1.7129346132278442, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.7129346132278442, "logits_per_char": -0.8564673066139221, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 115, "native_id": 115, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9467875957489014, "incorrect_loss_raw": 1.2727619806925456, "correct_loss_per_char": 0.9733937978744507, "incorrect_loss_per_char": 0.6363809903462728, "correct_loss_per_token": 1.9467875957489014, "incorrect_loss_per_token": 1.2727619806925456, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3286688327789307, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.3286688327789307, "logits_per_char": -0.6643344163894653, "num_chars": 2}, {"sum_logits": -1.196504831314087, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": true, "logits_per_token": -1.196504831314087, "logits_per_char": -0.5982524156570435, "num_chars": 2}, {"sum_logits": -1.2931122779846191, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.2931122779846191, "logits_per_char": -0.6465561389923096, "num_chars": 2}, {"sum_logits": -1.9467875957489014, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.9467875957489014, "logits_per_char": -0.9733937978744507, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 116, "native_id": 116, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8217064142227173, "incorrect_loss_raw": 1.3154455820719402, "correct_loss_per_char": 0.9108532071113586, "incorrect_loss_per_char": 0.6577227910359701, "correct_loss_per_token": 1.8217064142227173, "incorrect_loss_per_token": 1.3154455820719402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5392237901687622, "num_tokens": 1, "num_tokens_all": 709, "is_greedy": false, "logits_per_token": -1.5392237901687622, "logits_per_char": -0.7696118950843811, "num_chars": 2}, {"sum_logits": -1.0208507776260376, "num_tokens": 1, "num_tokens_all": 709, "is_greedy": true, "logits_per_token": -1.0208507776260376, "logits_per_char": -0.5104253888130188, "num_chars": 2}, {"sum_logits": -1.3862621784210205, "num_tokens": 1, "num_tokens_all": 709, "is_greedy": false, "logits_per_token": -1.3862621784210205, "logits_per_char": -0.6931310892105103, "num_chars": 2}, {"sum_logits": -1.8217064142227173, "num_tokens": 1, "num_tokens_all": 709, "is_greedy": false, "logits_per_token": -1.8217064142227173, "logits_per_char": -0.9108532071113586, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 117, "native_id": 117, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4829896688461304, "incorrect_loss_raw": 1.430287758509318, "correct_loss_per_char": 0.7414948344230652, "incorrect_loss_per_char": 0.715143879254659, "correct_loss_per_token": 1.4829896688461304, "incorrect_loss_per_token": 1.430287758509318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3097540140151978, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": false, "logits_per_token": -1.3097540140151978, "logits_per_char": -0.6548770070075989, "num_chars": 2}, {"sum_logits": -1.078276515007019, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": true, "logits_per_token": -1.078276515007019, "logits_per_char": -0.5391382575035095, "num_chars": 2}, {"sum_logits": -1.4829896688461304, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": false, "logits_per_token": -1.4829896688461304, "logits_per_char": -0.7414948344230652, "num_chars": 2}, {"sum_logits": -1.9028327465057373, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": false, "logits_per_token": -1.9028327465057373, "logits_per_char": -0.9514163732528687, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 118, "native_id": 118, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4800914525985718, "incorrect_loss_raw": 1.3978741963704426, "correct_loss_per_char": 0.7400457262992859, "incorrect_loss_per_char": 0.6989370981852213, "correct_loss_per_token": 1.4800914525985718, "incorrect_loss_per_token": 1.3978741963704426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4800914525985718, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.4800914525985718, "logits_per_char": -0.7400457262992859, "num_chars": 2}, {"sum_logits": -1.1408641338348389, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": true, "logits_per_token": -1.1408641338348389, "logits_per_char": -0.5704320669174194, "num_chars": 2}, {"sum_logits": -1.4530645608901978, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.4530645608901978, "logits_per_char": -0.7265322804450989, "num_chars": 2}, {"sum_logits": -1.5996938943862915, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.5996938943862915, "logits_per_char": -0.7998469471931458, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 119, "native_id": 119, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8676667213439941, "incorrect_loss_raw": 1.2980836232503254, "correct_loss_per_char": 0.9338333606719971, "incorrect_loss_per_char": 0.6490418116251627, "correct_loss_per_token": 1.8676667213439941, "incorrect_loss_per_token": 1.2980836232503254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1661196947097778, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": true, "logits_per_token": -1.1661196947097778, "logits_per_char": -0.5830598473548889, "num_chars": 2}, {"sum_logits": -1.2109049558639526, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.2109049558639526, "logits_per_char": -0.6054524779319763, "num_chars": 2}, {"sum_logits": -1.517226219177246, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.517226219177246, "logits_per_char": -0.758613109588623, "num_chars": 2}, {"sum_logits": -1.8676667213439941, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.8676667213439941, "logits_per_char": -0.9338333606719971, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 120, "native_id": 120, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.132413387298584, "incorrect_loss_raw": 1.5375994443893433, "correct_loss_per_char": 0.566206693649292, "incorrect_loss_per_char": 0.7687997221946716, "correct_loss_per_token": 1.132413387298584, "incorrect_loss_per_token": 1.5375994443893433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2758351564407349, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.2758351564407349, "logits_per_char": -0.6379175782203674, "num_chars": 2}, {"sum_logits": -1.132413387298584, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": true, "logits_per_token": -1.132413387298584, "logits_per_char": -0.566206693649292, "num_chars": 2}, {"sum_logits": -1.4510337114334106, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.4510337114334106, "logits_per_char": -0.7255168557167053, "num_chars": 2}, {"sum_logits": -1.8859294652938843, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.8859294652938843, "logits_per_char": -0.9429647326469421, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 121, "native_id": 121, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2124289274215698, "incorrect_loss_raw": 1.538524587949117, "correct_loss_per_char": 0.6062144637107849, "incorrect_loss_per_char": 0.7692622939745585, "correct_loss_per_token": 1.2124289274215698, "incorrect_loss_per_token": 1.538524587949117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.150587558746338, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": true, "logits_per_token": -1.150587558746338, "logits_per_char": -0.575293779373169, "num_chars": 2}, {"sum_logits": -1.2124289274215698, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.2124289274215698, "logits_per_char": -0.6062144637107849, "num_chars": 2}, {"sum_logits": -1.5103974342346191, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.5103974342346191, "logits_per_char": -0.7551987171173096, "num_chars": 2}, {"sum_logits": -1.954588770866394, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.954588770866394, "logits_per_char": -0.977294385433197, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 122, "native_id": 122, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3015642166137695, "incorrect_loss_raw": 1.4871011177698772, "correct_loss_per_char": 0.6507821083068848, "incorrect_loss_per_char": 0.7435505588849386, "correct_loss_per_token": 1.3015642166137695, "incorrect_loss_per_token": 1.4871011177698772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3533449172973633, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.3533449172973633, "logits_per_char": -0.6766724586486816, "num_chars": 2}, {"sum_logits": -1.1691149473190308, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": true, "logits_per_token": -1.1691149473190308, "logits_per_char": -0.5845574736595154, "num_chars": 2}, {"sum_logits": -1.3015642166137695, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.3015642166137695, "logits_per_char": -0.6507821083068848, "num_chars": 2}, {"sum_logits": -1.9388434886932373, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.9388434886932373, "logits_per_char": -0.9694217443466187, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 123, "native_id": 123, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8140443563461304, "incorrect_loss_raw": 1.3001749912897747, "correct_loss_per_char": 0.9070221781730652, "incorrect_loss_per_char": 0.6500874956448873, "correct_loss_per_token": 1.8140443563461304, "incorrect_loss_per_token": 1.3001749912897747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.285239577293396, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.285239577293396, "logits_per_char": -0.642619788646698, "num_chars": 2}, {"sum_logits": -1.1438813209533691, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": true, "logits_per_token": -1.1438813209533691, "logits_per_char": -0.5719406604766846, "num_chars": 2}, {"sum_logits": -1.4714040756225586, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.4714040756225586, "logits_per_char": -0.7357020378112793, "num_chars": 2}, {"sum_logits": -1.8140443563461304, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.8140443563461304, "logits_per_char": -0.9070221781730652, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 124, "native_id": 124, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.764594316482544, "incorrect_loss_raw": 1.318774938583374, "correct_loss_per_char": 0.882297158241272, "incorrect_loss_per_char": 0.659387469291687, "correct_loss_per_token": 1.764594316482544, "incorrect_loss_per_token": 1.318774938583374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3741039037704468, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.3741039037704468, "logits_per_char": -0.6870519518852234, "num_chars": 2}, {"sum_logits": -1.0689301490783691, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": true, "logits_per_token": -1.0689301490783691, "logits_per_char": -0.5344650745391846, "num_chars": 2}, {"sum_logits": -1.5132907629013062, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.5132907629013062, "logits_per_char": -0.7566453814506531, "num_chars": 2}, {"sum_logits": -1.764594316482544, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.764594316482544, "logits_per_char": -0.882297158241272, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 125, "native_id": 125, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8373148441314697, "incorrect_loss_raw": 1.2978339592615764, "correct_loss_per_char": 0.9186574220657349, "incorrect_loss_per_char": 0.6489169796307882, "correct_loss_per_token": 1.8373148441314697, "incorrect_loss_per_token": 1.2978339592615764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.421861171722412, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.421861171722412, "logits_per_char": -0.710930585861206, "num_chars": 2}, {"sum_logits": -1.156911015510559, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.156911015510559, "logits_per_char": -0.5784555077552795, "num_chars": 2}, {"sum_logits": -1.3147296905517578, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.3147296905517578, "logits_per_char": -0.6573648452758789, "num_chars": 2}, {"sum_logits": -1.8373148441314697, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.8373148441314697, "logits_per_char": -0.9186574220657349, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 126, "native_id": 126, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0134174823760986, "incorrect_loss_raw": 1.288114031155904, "correct_loss_per_char": 1.0067087411880493, "incorrect_loss_per_char": 0.644057015577952, "correct_loss_per_token": 2.0134174823760986, "incorrect_loss_per_token": 1.288114031155904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3168030977249146, "num_tokens": 1, "num_tokens_all": 641, "is_greedy": false, "logits_per_token": -1.3168030977249146, "logits_per_char": -0.6584015488624573, "num_chars": 2}, {"sum_logits": -1.0399348735809326, "num_tokens": 1, "num_tokens_all": 641, "is_greedy": true, "logits_per_token": -1.0399348735809326, "logits_per_char": -0.5199674367904663, "num_chars": 2}, {"sum_logits": -1.5076041221618652, "num_tokens": 1, "num_tokens_all": 641, "is_greedy": false, "logits_per_token": -1.5076041221618652, "logits_per_char": -0.7538020610809326, "num_chars": 2}, {"sum_logits": -2.0134174823760986, "num_tokens": 1, "num_tokens_all": 641, "is_greedy": false, "logits_per_token": -2.0134174823760986, "logits_per_char": -1.0067087411880493, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 127, "native_id": 127, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5141512155532837, "incorrect_loss_raw": 1.4312375386555989, "correct_loss_per_char": 0.7570756077766418, "incorrect_loss_per_char": 0.7156187693277994, "correct_loss_per_token": 1.5141512155532837, "incorrect_loss_per_token": 1.4312375386555989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.137900471687317, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": true, "logits_per_token": -1.137900471687317, "logits_per_char": -0.5689502358436584, "num_chars": 2}, {"sum_logits": -1.191635251045227, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.191635251045227, "logits_per_char": -0.5958176255226135, "num_chars": 2}, {"sum_logits": -1.5141512155532837, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.5141512155532837, "logits_per_char": -0.7570756077766418, "num_chars": 2}, {"sum_logits": -1.964176893234253, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.964176893234253, "logits_per_char": -0.9820884466171265, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 128, "native_id": 128, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6217695474624634, "incorrect_loss_raw": 1.3457333246866863, "correct_loss_per_char": 0.8108847737312317, "incorrect_loss_per_char": 0.6728666623433431, "correct_loss_per_token": 1.6217695474624634, "incorrect_loss_per_token": 1.3457333246866863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.481343150138855, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.481343150138855, "logits_per_char": -0.7406715750694275, "num_chars": 2}, {"sum_logits": -1.1557832956314087, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": true, "logits_per_token": -1.1557832956314087, "logits_per_char": -0.5778916478157043, "num_chars": 2}, {"sum_logits": -1.400073528289795, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.400073528289795, "logits_per_char": -0.7000367641448975, "num_chars": 2}, {"sum_logits": -1.6217695474624634, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.6217695474624634, "logits_per_char": -0.8108847737312317, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 129, "native_id": 129, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4143000841140747, "incorrect_loss_raw": 1.422859827677409, "correct_loss_per_char": 0.7071500420570374, "incorrect_loss_per_char": 0.7114299138387045, "correct_loss_per_token": 1.4143000841140747, "incorrect_loss_per_token": 1.422859827677409, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2713121175765991, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": false, "logits_per_token": -1.2713121175765991, "logits_per_char": -0.6356560587882996, "num_chars": 2}, {"sum_logits": -1.2646764516830444, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": true, "logits_per_token": -1.2646764516830444, "logits_per_char": -0.6323382258415222, "num_chars": 2}, {"sum_logits": -1.4143000841140747, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": false, "logits_per_token": -1.4143000841140747, "logits_per_char": -0.7071500420570374, "num_chars": 2}, {"sum_logits": -1.732590913772583, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": false, "logits_per_token": -1.732590913772583, "logits_per_char": -0.8662954568862915, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 130, "native_id": 130, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4025005102157593, "incorrect_loss_raw": 1.436898112297058, "correct_loss_per_char": 0.7012502551078796, "incorrect_loss_per_char": 0.718449056148529, "correct_loss_per_token": 1.4025005102157593, "incorrect_loss_per_token": 1.436898112297058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4025005102157593, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": false, "logits_per_token": -1.4025005102157593, "logits_per_char": -0.7012502551078796, "num_chars": 2}, {"sum_logits": -1.126711368560791, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": true, "logits_per_token": -1.126711368560791, "logits_per_char": -0.5633556842803955, "num_chars": 2}, {"sum_logits": -1.3513076305389404, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": false, "logits_per_token": -1.3513076305389404, "logits_per_char": -0.6756538152694702, "num_chars": 2}, {"sum_logits": -1.8326753377914429, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": false, "logits_per_token": -1.8326753377914429, "logits_per_char": -0.9163376688957214, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 131, "native_id": 131, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0238773822784424, "incorrect_loss_raw": 1.5900928974151611, "correct_loss_per_char": 0.5119386911392212, "incorrect_loss_per_char": 0.7950464487075806, "correct_loss_per_token": 1.0238773822784424, "incorrect_loss_per_token": 1.5900928974151611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5044808387756348, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.5044808387756348, "logits_per_char": -0.7522404193878174, "num_chars": 2}, {"sum_logits": -1.0238773822784424, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": true, "logits_per_token": -1.0238773822784424, "logits_per_char": -0.5119386911392212, "num_chars": 2}, {"sum_logits": -1.472522497177124, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.472522497177124, "logits_per_char": -0.736261248588562, "num_chars": 2}, {"sum_logits": -1.7932753562927246, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.7932753562927246, "logits_per_char": -0.8966376781463623, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 132, "native_id": 132, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4145097732543945, "incorrect_loss_raw": 1.44353981812795, "correct_loss_per_char": 0.7072548866271973, "incorrect_loss_per_char": 0.721769909063975, "correct_loss_per_token": 1.4145097732543945, "incorrect_loss_per_token": 1.44353981812795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.351618766784668, "num_tokens": 1, "num_tokens_all": 636, "is_greedy": false, "logits_per_token": -1.351618766784668, "logits_per_char": -0.675809383392334, "num_chars": 2}, {"sum_logits": -1.0956220626831055, "num_tokens": 1, "num_tokens_all": 636, "is_greedy": true, "logits_per_token": -1.0956220626831055, "logits_per_char": -0.5478110313415527, "num_chars": 2}, {"sum_logits": -1.4145097732543945, "num_tokens": 1, "num_tokens_all": 636, "is_greedy": false, "logits_per_token": -1.4145097732543945, "logits_per_char": -0.7072548866271973, "num_chars": 2}, {"sum_logits": -1.8833786249160767, "num_tokens": 1, "num_tokens_all": 636, "is_greedy": false, "logits_per_token": -1.8833786249160767, "logits_per_char": -0.9416893124580383, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 133, "native_id": 133, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1358827352523804, "incorrect_loss_raw": 1.5210017363230388, "correct_loss_per_char": 0.5679413676261902, "incorrect_loss_per_char": 0.7605008681615194, "correct_loss_per_token": 1.1358827352523804, "incorrect_loss_per_token": 1.5210017363230388, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5619890689849854, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.5619890689849854, "logits_per_char": -0.7809945344924927, "num_chars": 2}, {"sum_logits": -1.1358827352523804, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.1358827352523804, "logits_per_char": -0.5679413676261902, "num_chars": 2}, {"sum_logits": -1.2981295585632324, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.2981295585632324, "logits_per_char": -0.6490647792816162, "num_chars": 2}, {"sum_logits": -1.7028865814208984, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.7028865814208984, "logits_per_char": -0.8514432907104492, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 134, "native_id": 134, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6513392925262451, "incorrect_loss_raw": 1.3429809014002483, "correct_loss_per_char": 0.8256696462631226, "incorrect_loss_per_char": 0.6714904507001241, "correct_loss_per_token": 1.6513392925262451, "incorrect_loss_per_token": 1.3429809014002483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.405637502670288, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.405637502670288, "logits_per_char": -0.702818751335144, "num_chars": 2}, {"sum_logits": -1.2558563947677612, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": true, "logits_per_token": -1.2558563947677612, "logits_per_char": -0.6279281973838806, "num_chars": 2}, {"sum_logits": -1.3674488067626953, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.3674488067626953, "logits_per_char": -0.6837244033813477, "num_chars": 2}, {"sum_logits": -1.6513392925262451, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.6513392925262451, "logits_per_char": -0.8256696462631226, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 135, "native_id": 135, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.725052833557129, "incorrect_loss_raw": 1.3464411497116089, "correct_loss_per_char": 0.8625264167785645, "incorrect_loss_per_char": 0.6732205748558044, "correct_loss_per_token": 1.725052833557129, "incorrect_loss_per_token": 1.3464411497116089, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1590808629989624, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -1.1590808629989624, "logits_per_char": -0.5795404314994812, "num_chars": 2}, {"sum_logits": -1.2205216884613037, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.2205216884613037, "logits_per_char": -0.6102608442306519, "num_chars": 2}, {"sum_logits": -1.6597208976745605, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.6597208976745605, "logits_per_char": -0.8298604488372803, "num_chars": 2}, {"sum_logits": -1.725052833557129, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.725052833557129, "logits_per_char": -0.8625264167785645, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 136, "native_id": 136, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3173216581344604, "incorrect_loss_raw": 1.4778294960657756, "correct_loss_per_char": 0.6586608290672302, "incorrect_loss_per_char": 0.7389147480328878, "correct_loss_per_token": 1.3173216581344604, "incorrect_loss_per_token": 1.4778294960657756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.25177800655365, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": true, "logits_per_token": -1.25177800655365, "logits_per_char": -0.625889003276825, "num_chars": 2}, {"sum_logits": -1.261156678199768, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.261156678199768, "logits_per_char": -0.630578339099884, "num_chars": 2}, {"sum_logits": -1.3173216581344604, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.3173216581344604, "logits_per_char": -0.6586608290672302, "num_chars": 2}, {"sum_logits": -1.9205538034439087, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.9205538034439087, "logits_per_char": -0.9602769017219543, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 137, "native_id": 137, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8533076047897339, "incorrect_loss_raw": 1.2904389301935832, "correct_loss_per_char": 0.9266538023948669, "incorrect_loss_per_char": 0.6452194650967916, "correct_loss_per_token": 1.8533076047897339, "incorrect_loss_per_token": 1.2904389301935832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3085614442825317, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.3085614442825317, "logits_per_char": -0.6542807221412659, "num_chars": 2}, {"sum_logits": -1.1652897596359253, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.1652897596359253, "logits_per_char": -0.5826448798179626, "num_chars": 2}, {"sum_logits": -1.3974655866622925, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.3974655866622925, "logits_per_char": -0.6987327933311462, "num_chars": 2}, {"sum_logits": -1.8533076047897339, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.8533076047897339, "logits_per_char": -0.9266538023948669, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 138, "native_id": 138, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.106398105621338, "incorrect_loss_raw": 1.2528328895568848, "correct_loss_per_char": 1.053199052810669, "incorrect_loss_per_char": 0.6264164447784424, "correct_loss_per_token": 2.106398105621338, "incorrect_loss_per_token": 1.2528328895568848, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.290447473526001, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.290447473526001, "logits_per_char": -0.6452237367630005, "num_chars": 2}, {"sum_logits": -1.1994872093200684, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": true, "logits_per_token": -1.1994872093200684, "logits_per_char": -0.5997436046600342, "num_chars": 2}, {"sum_logits": -1.268563985824585, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.268563985824585, "logits_per_char": -0.6342819929122925, "num_chars": 2}, {"sum_logits": -2.106398105621338, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -2.106398105621338, "logits_per_char": -1.053199052810669, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 139, "native_id": 139, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2573652267456055, "incorrect_loss_raw": 1.487789273262024, "correct_loss_per_char": 0.6286826133728027, "incorrect_loss_per_char": 0.743894636631012, "correct_loss_per_token": 1.2573652267456055, "incorrect_loss_per_token": 1.487789273262024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7174272537231445, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.7174272537231445, "logits_per_char": -0.8587136268615723, "num_chars": 2}, {"sum_logits": -1.1842752695083618, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.1842752695083618, "logits_per_char": -0.5921376347541809, "num_chars": 2}, {"sum_logits": -1.2573652267456055, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.2573652267456055, "logits_per_char": -0.6286826133728027, "num_chars": 2}, {"sum_logits": -1.5616652965545654, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.5616652965545654, "logits_per_char": -0.7808326482772827, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 140, "native_id": 140, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.368219256401062, "incorrect_loss_raw": 1.4806067546208699, "correct_loss_per_char": 0.684109628200531, "incorrect_loss_per_char": 0.7403033773104349, "correct_loss_per_token": 1.368219256401062, "incorrect_loss_per_token": 1.4806067546208699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.368219256401062, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.368219256401062, "logits_per_char": -0.684109628200531, "num_chars": 2}, {"sum_logits": -1.0156807899475098, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": true, "logits_per_token": -1.0156807899475098, "logits_per_char": -0.5078403949737549, "num_chars": 2}, {"sum_logits": -1.4891228675842285, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.4891228675842285, "logits_per_char": -0.7445614337921143, "num_chars": 2}, {"sum_logits": -1.9370166063308716, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.9370166063308716, "logits_per_char": -0.9685083031654358, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 141, "native_id": 141, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3789334297180176, "incorrect_loss_raw": 1.4364811579386394, "correct_loss_per_char": 0.6894667148590088, "incorrect_loss_per_char": 0.7182405789693197, "correct_loss_per_token": 1.3789334297180176, "incorrect_loss_per_token": 1.4364811579386394, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.398112177848816, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.398112177848816, "logits_per_char": -0.699056088924408, "num_chars": 2}, {"sum_logits": -1.1371389627456665, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": true, "logits_per_token": -1.1371389627456665, "logits_per_char": -0.5685694813728333, "num_chars": 2}, {"sum_logits": -1.3789334297180176, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.3789334297180176, "logits_per_char": -0.6894667148590088, "num_chars": 2}, {"sum_logits": -1.7741923332214355, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.7741923332214355, "logits_per_char": -0.8870961666107178, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 142, "native_id": 142, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4068105220794678, "incorrect_loss_raw": 1.4226607084274292, "correct_loss_per_char": 0.7034052610397339, "incorrect_loss_per_char": 0.7113303542137146, "correct_loss_per_token": 1.4068105220794678, "incorrect_loss_per_token": 1.4226607084274292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4480911493301392, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.4480911493301392, "logits_per_char": -0.7240455746650696, "num_chars": 2}, {"sum_logits": -1.142393946647644, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": true, "logits_per_token": -1.142393946647644, "logits_per_char": -0.571196973323822, "num_chars": 2}, {"sum_logits": -1.4068105220794678, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.4068105220794678, "logits_per_char": -0.7034052610397339, "num_chars": 2}, {"sum_logits": -1.6774970293045044, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.6774970293045044, "logits_per_char": -0.8387485146522522, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 143, "native_id": 143, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3052821159362793, "incorrect_loss_raw": 1.4567288557688396, "correct_loss_per_char": 0.6526410579681396, "incorrect_loss_per_char": 0.7283644278844198, "correct_loss_per_token": 1.3052821159362793, "incorrect_loss_per_token": 1.4567288557688396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3052821159362793, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.3052821159362793, "logits_per_char": -0.6526410579681396, "num_chars": 2}, {"sum_logits": -1.1761094331741333, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": true, "logits_per_token": -1.1761094331741333, "logits_per_char": -0.5880547165870667, "num_chars": 2}, {"sum_logits": -1.5554466247558594, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.5554466247558594, "logits_per_char": -0.7777233123779297, "num_chars": 2}, {"sum_logits": -1.6386305093765259, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.6386305093765259, "logits_per_char": -0.8193152546882629, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 144, "native_id": 144, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1298531293869019, "incorrect_loss_raw": 1.527436335881551, "correct_loss_per_char": 0.5649265646934509, "incorrect_loss_per_char": 0.7637181679407755, "correct_loss_per_token": 1.1298531293869019, "incorrect_loss_per_token": 1.527436335881551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.345976710319519, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.345976710319519, "logits_per_char": -0.6729883551597595, "num_chars": 2}, {"sum_logits": -1.1298531293869019, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": true, "logits_per_token": -1.1298531293869019, "logits_per_char": -0.5649265646934509, "num_chars": 2}, {"sum_logits": -1.4330576658248901, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.4330576658248901, "logits_per_char": -0.7165288329124451, "num_chars": 2}, {"sum_logits": -1.8032746315002441, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.8032746315002441, "logits_per_char": -0.9016373157501221, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 145, "native_id": 145, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7654517889022827, "incorrect_loss_raw": 1.312799096107483, "correct_loss_per_char": 0.8827258944511414, "incorrect_loss_per_char": 0.6563995480537415, "correct_loss_per_token": 1.7654517889022827, "incorrect_loss_per_token": 1.312799096107483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2675374746322632, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.2675374746322632, "logits_per_char": -0.6337687373161316, "num_chars": 2}, {"sum_logits": -1.1864213943481445, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": true, "logits_per_token": -1.1864213943481445, "logits_per_char": -0.5932106971740723, "num_chars": 2}, {"sum_logits": -1.484438419342041, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.484438419342041, "logits_per_char": -0.7422192096710205, "num_chars": 2}, {"sum_logits": -1.7654517889022827, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.7654517889022827, "logits_per_char": -0.8827258944511414, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 146, "native_id": 146, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.187377691268921, "incorrect_loss_raw": 1.4995768070220947, "correct_loss_per_char": 0.5936888456344604, "incorrect_loss_per_char": 0.7497884035110474, "correct_loss_per_token": 1.187377691268921, "incorrect_loss_per_token": 1.4995768070220947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4616672992706299, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.4616672992706299, "logits_per_char": -0.7308336496353149, "num_chars": 2}, {"sum_logits": -1.187377691268921, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": true, "logits_per_token": -1.187377691268921, "logits_per_char": -0.5936888456344604, "num_chars": 2}, {"sum_logits": -1.466328740119934, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.466328740119934, "logits_per_char": -0.733164370059967, "num_chars": 2}, {"sum_logits": -1.5707343816757202, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.5707343816757202, "logits_per_char": -0.7853671908378601, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 147, "native_id": 147, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1955853700637817, "incorrect_loss_raw": 1.52656086285909, "correct_loss_per_char": 0.5977926850318909, "incorrect_loss_per_char": 0.763280431429545, "correct_loss_per_token": 1.1955853700637817, "incorrect_loss_per_token": 1.52656086285909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1955853700637817, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.1955853700637817, "logits_per_char": -0.5977926850318909, "num_chars": 2}, {"sum_logits": -1.2598565816879272, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.2598565816879272, "logits_per_char": -0.6299282908439636, "num_chars": 2}, {"sum_logits": -1.4051090478897095, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.4051090478897095, "logits_per_char": -0.7025545239448547, "num_chars": 2}, {"sum_logits": -1.9147169589996338, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.9147169589996338, "logits_per_char": -0.9573584794998169, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 148, "native_id": 148, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9140679836273193, "incorrect_loss_raw": 1.3000677824020386, "correct_loss_per_char": 0.9570339918136597, "incorrect_loss_per_char": 0.6500338912010193, "correct_loss_per_token": 1.9140679836273193, "incorrect_loss_per_token": 1.3000677824020386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2420457601547241, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": true, "logits_per_token": -1.2420457601547241, "logits_per_char": -0.6210228800773621, "num_chars": 2}, {"sum_logits": -1.2547450065612793, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.2547450065612793, "logits_per_char": -0.6273725032806396, "num_chars": 2}, {"sum_logits": -1.4034125804901123, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.4034125804901123, "logits_per_char": -0.7017062902450562, "num_chars": 2}, {"sum_logits": -1.9140679836273193, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.9140679836273193, "logits_per_char": -0.9570339918136597, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 149, "native_id": 149, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2777800559997559, "incorrect_loss_raw": 1.4755776325861614, "correct_loss_per_char": 0.6388900279998779, "incorrect_loss_per_char": 0.7377888162930807, "correct_loss_per_token": 1.2777800559997559, "incorrect_loss_per_token": 1.4755776325861614, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2777800559997559, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.2777800559997559, "logits_per_char": -0.6388900279998779, "num_chars": 2}, {"sum_logits": -1.1884486675262451, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": true, "logits_per_token": -1.1884486675262451, "logits_per_char": -0.5942243337631226, "num_chars": 2}, {"sum_logits": -1.444412112236023, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.444412112236023, "logits_per_char": -0.7222060561180115, "num_chars": 2}, {"sum_logits": -1.7938721179962158, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.7938721179962158, "logits_per_char": -0.8969360589981079, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 150, "native_id": 150, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.410185694694519, "incorrect_loss_raw": 1.4254753192265828, "correct_loss_per_char": 0.7050928473472595, "incorrect_loss_per_char": 0.7127376596132914, "correct_loss_per_token": 1.410185694694519, "incorrect_loss_per_token": 1.4254753192265828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4011279344558716, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.4011279344558716, "logits_per_char": -0.7005639672279358, "num_chars": 2}, {"sum_logits": -1.1515055894851685, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": true, "logits_per_token": -1.1515055894851685, "logits_per_char": -0.5757527947425842, "num_chars": 2}, {"sum_logits": -1.410185694694519, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.410185694694519, "logits_per_char": -0.7050928473472595, "num_chars": 2}, {"sum_logits": -1.7237924337387085, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.7237924337387085, "logits_per_char": -0.8618962168693542, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
{"doc_id": 151, "native_id": 151, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.183058738708496, "incorrect_loss_raw": 1.53738800684611, "correct_loss_per_char": 0.591529369354248, "incorrect_loss_per_char": 0.768694003423055, "correct_loss_per_token": 1.183058738708496, "incorrect_loss_per_token": 1.53738800684611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1810146570205688, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.1810146570205688, "logits_per_char": -0.5905073285102844, "num_chars": 2}, {"sum_logits": -1.183058738708496, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.183058738708496, "logits_per_char": -0.591529369354248, "num_chars": 2}, {"sum_logits": -1.5057308673858643, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.5057308673858643, "logits_per_char": -0.7528654336929321, "num_chars": 2}, {"sum_logits": -1.925418496131897, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.925418496131897, "logits_per_char": -0.9627092480659485, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "ed4f59953ec0e27753b699d099c8e94b"} |
|
|