omarelshehy commited on
Commit
dc83cb1
·
verified ·
1 Parent(s): 79ba36d

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "aubmindlab/bert-base-arabertv02",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.45.2",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 64002
25
+ }
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<passage>": 64001,
3
+ "<query>": 64000
4
+ }
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "aubmindlab/bert-base-arabertv02",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.45.2",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 64002
25
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.2.1",
4
+ "transformers": "4.45.2",
5
+ "pytorch": "2.1.0+cu118"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7287986689fab8c867a0b99682022c848ae2a015c75194560b119618f96214cf
3
+ size 540801896
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1b31f4ac606cfee5c554f2ed0675f4fbe1b69210f580a41bf81b5c90e67b879
3
+ size 1076999098
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a30a127e91bea625ce6a21f7f0c95ca900b9ea5e124097be2534876d0d0acb
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5ddfdac7e5a8be097378232363eb8a98fba4780c49a2b78b57dec384e48bbb
3
+ size 1064
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[رابط]",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": true,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "[بريد]",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": true,
57
+ "special": true
58
+ },
59
+ "7": {
60
+ "content": "[مستخدم]",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": true,
65
+ "special": true
66
+ },
67
+ "64000": {
68
+ "content": "<query>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "64001": {
76
+ "content": "<passage>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ }
83
+ },
84
+ "clean_up_tokenization_spaces": false,
85
+ "cls_token": "[CLS]",
86
+ "do_basic_tokenize": true,
87
+ "do_lower_case": false,
88
+ "mask_token": "[MASK]",
89
+ "max_len": 512,
90
+ "model_max_length": 512,
91
+ "never_split": [
92
+ "[بريد]",
93
+ "[مستخدم]",
94
+ "[رابط]"
95
+ ],
96
+ "pad_token": "[PAD]",
97
+ "sep_token": "[SEP]",
98
+ "strip_accents": null,
99
+ "tokenize_chinese_chars": true,
100
+ "tokenizer_class": "BertTokenizer",
101
+ "unk_token": "[UNK]"
102
+ }
trainer_state.json ADDED
@@ -0,0 +1,3105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8163072349117797,
3
+ "best_model_checkpoint": "output/arabert-2stage/checkpoint-4000",
4
+ "epoch": 0.6798096532970768,
5
+ "eval_steps": 500,
6
+ "global_step": 4000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.001699524133242692,
13
+ "grad_norm": 18.25199317932129,
14
+ "learning_rate": 1.3582342954159594e-07,
15
+ "loss": 4.8813,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.003399048266485384,
20
+ "grad_norm": 22.529903411865234,
21
+ "learning_rate": 3.0560271646859084e-07,
22
+ "loss": 4.555,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.005098572399728076,
27
+ "grad_norm": 16.15692901611328,
28
+ "learning_rate": 4.7538200339558575e-07,
29
+ "loss": 4.5007,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.006798096532970768,
34
+ "grad_norm": 15.619988441467285,
35
+ "learning_rate": 6.451612903225807e-07,
36
+ "loss": 3.9173,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.00849762066621346,
41
+ "grad_norm": 16.881023406982422,
42
+ "learning_rate": 8.149405772495757e-07,
43
+ "loss": 4.3681,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.010197144799456152,
48
+ "grad_norm": 16.038394927978516,
49
+ "learning_rate": 9.67741935483871e-07,
50
+ "loss": 4.2357,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.011896668932698844,
55
+ "grad_norm": 12.993019104003906,
56
+ "learning_rate": 1.137521222410866e-06,
57
+ "loss": 4.1423,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.013596193065941536,
62
+ "grad_norm": 15.696857452392578,
63
+ "learning_rate": 1.307300509337861e-06,
64
+ "loss": 3.8028,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.015295717199184228,
69
+ "grad_norm": 10.482597351074219,
70
+ "learning_rate": 1.477079796264856e-06,
71
+ "loss": 3.8624,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.01699524133242692,
76
+ "grad_norm": 13.150389671325684,
77
+ "learning_rate": 1.6468590831918508e-06,
78
+ "loss": 3.5161,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.018694765465669613,
83
+ "grad_norm": 11.572787284851074,
84
+ "learning_rate": 1.8166383701188457e-06,
85
+ "loss": 3.4791,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.020394289598912305,
90
+ "grad_norm": 13.110374450683594,
91
+ "learning_rate": 1.9864176570458403e-06,
92
+ "loss": 3.6538,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.022093813732154997,
97
+ "grad_norm": 10.92455768585205,
98
+ "learning_rate": 2.1561969439728354e-06,
99
+ "loss": 2.8184,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.02379333786539769,
104
+ "grad_norm": 13.158881187438965,
105
+ "learning_rate": 2.3259762308998304e-06,
106
+ "loss": 3.321,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.02549286199864038,
111
+ "grad_norm": 7.724843502044678,
112
+ "learning_rate": 2.4957555178268255e-06,
113
+ "loss": 2.7731,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.027192386131883073,
118
+ "grad_norm": 9.516618728637695,
119
+ "learning_rate": 2.6655348047538205e-06,
120
+ "loss": 3.0967,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.028891910265125765,
125
+ "grad_norm": 8.883516311645508,
126
+ "learning_rate": 2.835314091680815e-06,
127
+ "loss": 2.8666,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.030591434398368457,
132
+ "grad_norm": 7.585137844085693,
133
+ "learning_rate": 3.0050933786078102e-06,
134
+ "loss": 2.2543,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.032290958531611146,
139
+ "grad_norm": 11.794560432434082,
140
+ "learning_rate": 3.174872665534805e-06,
141
+ "loss": 3.1811,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.03399048266485384,
146
+ "grad_norm": 8.631832122802734,
147
+ "learning_rate": 3.3446519524618e-06,
148
+ "loss": 2.5646,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.03569000679809653,
153
+ "grad_norm": 7.68147611618042,
154
+ "learning_rate": 3.5144312393887946e-06,
155
+ "loss": 2.8453,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.037389530931339225,
160
+ "grad_norm": 9.345367431640625,
161
+ "learning_rate": 3.6842105263157896e-06,
162
+ "loss": 3.0061,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.039089055064581914,
167
+ "grad_norm": 10.231043815612793,
168
+ "learning_rate": 3.853989813242784e-06,
169
+ "loss": 2.5211,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.04078857919782461,
174
+ "grad_norm": 6.374863147735596,
175
+ "learning_rate": 4.02376910016978e-06,
176
+ "loss": 2.6467,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.0424881033310673,
181
+ "grad_norm": 12.607449531555176,
182
+ "learning_rate": 4.193548387096774e-06,
183
+ "loss": 2.1417,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.04418762746430999,
188
+ "grad_norm": 12.550193786621094,
189
+ "learning_rate": 4.36332767402377e-06,
190
+ "loss": 2.7977,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.04588715159755268,
195
+ "grad_norm": 9.875368118286133,
196
+ "learning_rate": 4.5331069609507645e-06,
197
+ "loss": 2.8345,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.04758667573079538,
202
+ "grad_norm": 14.792099952697754,
203
+ "learning_rate": 4.702886247877759e-06,
204
+ "loss": 2.8812,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.049286199864038066,
209
+ "grad_norm": 11.13675308227539,
210
+ "learning_rate": 4.872665534804754e-06,
211
+ "loss": 2.493,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.05098572399728076,
216
+ "grad_norm": 15.685995101928711,
217
+ "learning_rate": 5.042444821731749e-06,
218
+ "loss": 2.5967,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.05268524813052345,
223
+ "grad_norm": 8.819021224975586,
224
+ "learning_rate": 5.212224108658745e-06,
225
+ "loss": 2.4955,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.054384772263766146,
230
+ "grad_norm": 12.058821678161621,
231
+ "learning_rate": 5.3820033955857386e-06,
232
+ "loss": 2.2387,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.056084296397008834,
237
+ "grad_norm": 9.35466194152832,
238
+ "learning_rate": 5.551782682512734e-06,
239
+ "loss": 2.6786,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.05778382053025153,
244
+ "grad_norm": 9.817248344421387,
245
+ "learning_rate": 5.721561969439729e-06,
246
+ "loss": 2.2292,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.05948334466349422,
251
+ "grad_norm": 7.38469934463501,
252
+ "learning_rate": 5.891341256366724e-06,
253
+ "loss": 1.8386,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.061182868796736914,
258
+ "grad_norm": 7.639986038208008,
259
+ "learning_rate": 6.061120543293718e-06,
260
+ "loss": 2.5317,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.0628823929299796,
265
+ "grad_norm": 14.127429962158203,
266
+ "learning_rate": 6.2308998302207134e-06,
267
+ "loss": 2.3225,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.06458191706322229,
272
+ "grad_norm": 6.480667591094971,
273
+ "learning_rate": 6.400679117147709e-06,
274
+ "loss": 2.5349,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.066281441196465,
279
+ "grad_norm": 12.269805908203125,
280
+ "learning_rate": 6.5704584040747036e-06,
281
+ "loss": 2.5192,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.06798096532970768,
286
+ "grad_norm": 10.89499568939209,
287
+ "learning_rate": 6.740237691001699e-06,
288
+ "loss": 2.6806,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.06968048946295037,
293
+ "grad_norm": 6.393769264221191,
294
+ "learning_rate": 6.910016977928693e-06,
295
+ "loss": 2.593,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.07138001359619306,
300
+ "grad_norm": 10.500598907470703,
301
+ "learning_rate": 7.079796264855688e-06,
302
+ "loss": 1.893,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.07307953772943576,
307
+ "grad_norm": 9.394804954528809,
308
+ "learning_rate": 7.249575551782683e-06,
309
+ "loss": 2.4957,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.07477906186267845,
314
+ "grad_norm": 8.060555458068848,
315
+ "learning_rate": 7.4193548387096784e-06,
316
+ "loss": 2.2887,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.07647858599592114,
321
+ "grad_norm": 7.361180305480957,
322
+ "learning_rate": 7.589134125636672e-06,
323
+ "loss": 1.8902,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.07817811012916383,
328
+ "grad_norm": 9.406976699829102,
329
+ "learning_rate": 7.758913412563669e-06,
330
+ "loss": 2.445,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.07987763426240653,
335
+ "grad_norm": 6.804340362548828,
336
+ "learning_rate": 7.928692699490664e-06,
337
+ "loss": 2.3188,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.08157715839564922,
342
+ "grad_norm": 8.055757522583008,
343
+ "learning_rate": 8.098471986417658e-06,
344
+ "loss": 1.7857,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 0.08327668252889191,
349
+ "grad_norm": 14.029518127441406,
350
+ "learning_rate": 8.268251273344653e-06,
351
+ "loss": 1.9323,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 0.0849762066621346,
356
+ "grad_norm": 7.238176345825195,
357
+ "learning_rate": 8.438030560271647e-06,
358
+ "loss": 2.2119,
359
+ "step": 500
360
+ },
361
+ {
362
+ "epoch": 0.0849762066621346,
363
+ "eval_cosine_accuracy@1": 0.644,
364
+ "eval_cosine_accuracy@10": 0.89,
365
+ "eval_cosine_accuracy@3": 0.7905,
366
+ "eval_cosine_accuracy@5": 0.842,
367
+ "eval_cosine_map@100": 0.7319568991974511,
368
+ "eval_cosine_mrr@10": 0.727909523809523,
369
+ "eval_cosine_ndcg@10": 0.7672958038269853,
370
+ "eval_cosine_precision@1": 0.644,
371
+ "eval_cosine_precision@10": 0.089,
372
+ "eval_cosine_precision@3": 0.2635,
373
+ "eval_cosine_precision@5": 0.1684,
374
+ "eval_cosine_recall@1": 0.644,
375
+ "eval_cosine_recall@10": 0.89,
376
+ "eval_cosine_recall@3": 0.7905,
377
+ "eval_cosine_recall@5": 0.842,
378
+ "eval_loss": 2.020264148712158,
379
+ "eval_runtime": 2.7651,
380
+ "eval_samples_per_second": 272.684,
381
+ "eval_sequential_score": 0.7319568991974511,
382
+ "eval_steps_per_second": 2.17,
383
+ "eval_sts-dev_pearson_cosine": 0.7945015650907715,
384
+ "eval_sts-dev_pearson_dot": 0.7892461139806375,
385
+ "eval_sts-dev_pearson_euclidean": 0.7772212795710262,
386
+ "eval_sts-dev_pearson_manhattan": 0.7758198419084321,
387
+ "eval_sts-dev_pearson_max": 0.7945015650907715,
388
+ "eval_sts-dev_spearman_cosine": 0.8029369749021982,
389
+ "eval_sts-dev_spearman_dot": 0.7981715789995407,
390
+ "eval_sts-dev_spearman_euclidean": 0.7974969539844081,
391
+ "eval_sts-dev_spearman_manhattan": 0.793442700340276,
392
+ "eval_sts-dev_spearman_max": 0.8029369749021982,
393
+ "step": 500
394
+ },
395
+ {
396
+ "epoch": 0.0866757307953773,
397
+ "grad_norm": 8.096364974975586,
398
+ "learning_rate": 8.607809847198643e-06,
399
+ "loss": 2.4202,
400
+ "step": 510
401
+ },
402
+ {
403
+ "epoch": 0.08837525492861999,
404
+ "grad_norm": 5.928977012634277,
405
+ "learning_rate": 8.777589134125636e-06,
406
+ "loss": 1.9747,
407
+ "step": 520
408
+ },
409
+ {
410
+ "epoch": 0.09007477906186268,
411
+ "grad_norm": 5.913745880126953,
412
+ "learning_rate": 8.947368421052632e-06,
413
+ "loss": 2.727,
414
+ "step": 530
415
+ },
416
+ {
417
+ "epoch": 0.09177430319510536,
418
+ "grad_norm": 11.565766334533691,
419
+ "learning_rate": 9.117147707979627e-06,
420
+ "loss": 2.2645,
421
+ "step": 540
422
+ },
423
+ {
424
+ "epoch": 0.09347382732834807,
425
+ "grad_norm": 5.834039688110352,
426
+ "learning_rate": 9.286926994906623e-06,
427
+ "loss": 2.5484,
428
+ "step": 550
429
+ },
430
+ {
431
+ "epoch": 0.09517335146159075,
432
+ "grad_norm": 9.030844688415527,
433
+ "learning_rate": 9.456706281833618e-06,
434
+ "loss": 2.0806,
435
+ "step": 560
436
+ },
437
+ {
438
+ "epoch": 0.09687287559483344,
439
+ "grad_norm": 8.642946243286133,
440
+ "learning_rate": 9.626485568760612e-06,
441
+ "loss": 2.4332,
442
+ "step": 570
443
+ },
444
+ {
445
+ "epoch": 0.09857239972807613,
446
+ "grad_norm": 8.626252174377441,
447
+ "learning_rate": 9.796264855687608e-06,
448
+ "loss": 1.866,
449
+ "step": 580
450
+ },
451
+ {
452
+ "epoch": 0.10027192386131883,
453
+ "grad_norm": 7.73045539855957,
454
+ "learning_rate": 9.966044142614601e-06,
455
+ "loss": 2.1285,
456
+ "step": 590
457
+ },
458
+ {
459
+ "epoch": 0.10197144799456152,
460
+ "grad_norm": 8.957172393798828,
461
+ "learning_rate": 9.999943676843767e-06,
462
+ "loss": 2.3799,
463
+ "step": 600
464
+ },
465
+ {
466
+ "epoch": 0.10367097212780421,
467
+ "grad_norm": 7.423664093017578,
468
+ "learning_rate": 9.999714866196319e-06,
469
+ "loss": 2.6725,
470
+ "step": 610
471
+ },
472
+ {
473
+ "epoch": 0.1053704962610469,
474
+ "grad_norm": 9.436025619506836,
475
+ "learning_rate": 9.999310055908776e-06,
476
+ "loss": 2.4767,
477
+ "step": 620
478
+ },
479
+ {
480
+ "epoch": 0.1070700203942896,
481
+ "grad_norm": 8.621382713317871,
482
+ "learning_rate": 9.998729260231243e-06,
483
+ "loss": 1.9994,
484
+ "step": 630
485
+ },
486
+ {
487
+ "epoch": 0.10876954452753229,
488
+ "grad_norm": 8.81654167175293,
489
+ "learning_rate": 9.997972499608852e-06,
490
+ "loss": 2.6699,
491
+ "step": 640
492
+ },
493
+ {
494
+ "epoch": 0.11046906866077498,
495
+ "grad_norm": 6.4313154220581055,
496
+ "learning_rate": 9.997039800681044e-06,
497
+ "loss": 2.2264,
498
+ "step": 650
499
+ },
500
+ {
501
+ "epoch": 0.11216859279401767,
502
+ "grad_norm": 8.586142539978027,
503
+ "learning_rate": 9.995931196280622e-06,
504
+ "loss": 2.0675,
505
+ "step": 660
506
+ },
507
+ {
508
+ "epoch": 0.11386811692726037,
509
+ "grad_norm": 6.31871223449707,
510
+ "learning_rate": 9.994646725432611e-06,
511
+ "loss": 1.7862,
512
+ "step": 670
513
+ },
514
+ {
515
+ "epoch": 0.11556764106050306,
516
+ "grad_norm": 9.907938957214355,
517
+ "learning_rate": 9.993186433352867e-06,
518
+ "loss": 2.5078,
519
+ "step": 680
520
+ },
521
+ {
522
+ "epoch": 0.11726716519374575,
523
+ "grad_norm": 5.77314567565918,
524
+ "learning_rate": 9.9915503714465e-06,
525
+ "loss": 2.135,
526
+ "step": 690
527
+ },
528
+ {
529
+ "epoch": 0.11896668932698844,
530
+ "grad_norm": 15.68774700164795,
531
+ "learning_rate": 9.989738597306053e-06,
532
+ "loss": 2.5356,
533
+ "step": 700
534
+ },
535
+ {
536
+ "epoch": 0.12066621346023114,
537
+ "grad_norm": 13.062637329101562,
538
+ "learning_rate": 9.987751174709489e-06,
539
+ "loss": 2.4131,
540
+ "step": 710
541
+ },
542
+ {
543
+ "epoch": 0.12236573759347383,
544
+ "grad_norm": 6.619427680969238,
545
+ "learning_rate": 9.98558817361792e-06,
546
+ "loss": 2.0465,
547
+ "step": 720
548
+ },
549
+ {
550
+ "epoch": 0.12406526172671652,
551
+ "grad_norm": 10.573486328125,
552
+ "learning_rate": 9.98324967017318e-06,
553
+ "loss": 2.5195,
554
+ "step": 730
555
+ },
556
+ {
557
+ "epoch": 0.1257647858599592,
558
+ "grad_norm": 10.98731803894043,
559
+ "learning_rate": 9.98073574669511e-06,
560
+ "loss": 2.3977,
561
+ "step": 740
562
+ },
563
+ {
564
+ "epoch": 0.1274643099932019,
565
+ "grad_norm": 7.568258285522461,
566
+ "learning_rate": 9.978046491678686e-06,
567
+ "loss": 1.8389,
568
+ "step": 750
569
+ },
570
+ {
571
+ "epoch": 0.12916383412644458,
572
+ "grad_norm": 6.886653423309326,
573
+ "learning_rate": 9.975181999790888e-06,
574
+ "loss": 2.1653,
575
+ "step": 760
576
+ },
577
+ {
578
+ "epoch": 0.13086335825968728,
579
+ "grad_norm": 8.51340103149414,
580
+ "learning_rate": 9.972142371867375e-06,
581
+ "loss": 2.3171,
582
+ "step": 770
583
+ },
584
+ {
585
+ "epoch": 0.13256288239293,
586
+ "grad_norm": 13.55966567993164,
587
+ "learning_rate": 9.968927714908934e-06,
588
+ "loss": 2.3126,
589
+ "step": 780
590
+ },
591
+ {
592
+ "epoch": 0.13426240652617266,
593
+ "grad_norm": 6.183629035949707,
594
+ "learning_rate": 9.965538142077709e-06,
595
+ "loss": 1.8593,
596
+ "step": 790
597
+ },
598
+ {
599
+ "epoch": 0.13596193065941536,
600
+ "grad_norm": 8.71070671081543,
601
+ "learning_rate": 9.961973772693226e-06,
602
+ "loss": 2.3248,
603
+ "step": 800
604
+ },
605
+ {
606
+ "epoch": 0.13766145479265807,
607
+ "grad_norm": 6.578171253204346,
608
+ "learning_rate": 9.958234732228184e-06,
609
+ "loss": 1.4786,
610
+ "step": 810
611
+ },
612
+ {
613
+ "epoch": 0.13936097892590074,
614
+ "grad_norm": 8.017528533935547,
615
+ "learning_rate": 9.954321152304049e-06,
616
+ "loss": 2.5924,
617
+ "step": 820
618
+ },
619
+ {
620
+ "epoch": 0.14106050305914344,
621
+ "grad_norm": 9.630891799926758,
622
+ "learning_rate": 9.950233170686404e-06,
623
+ "loss": 2.7052,
624
+ "step": 830
625
+ },
626
+ {
627
+ "epoch": 0.14276002719238612,
628
+ "grad_norm": 9.43925952911377,
629
+ "learning_rate": 9.945970931280117e-06,
630
+ "loss": 2.9104,
631
+ "step": 840
632
+ },
633
+ {
634
+ "epoch": 0.14445955132562882,
635
+ "grad_norm": 7.2036967277526855,
636
+ "learning_rate": 9.941534584124262e-06,
637
+ "loss": 1.905,
638
+ "step": 850
639
+ },
640
+ {
641
+ "epoch": 0.14615907545887152,
642
+ "grad_norm": 8.345267295837402,
643
+ "learning_rate": 9.936924285386849e-06,
644
+ "loss": 1.4475,
645
+ "step": 860
646
+ },
647
+ {
648
+ "epoch": 0.1478585995921142,
649
+ "grad_norm": 6.2061614990234375,
650
+ "learning_rate": 9.932140197359312e-06,
651
+ "loss": 1.9715,
652
+ "step": 870
653
+ },
654
+ {
655
+ "epoch": 0.1495581237253569,
656
+ "grad_norm": 8.147466659545898,
657
+ "learning_rate": 9.927182488450813e-06,
658
+ "loss": 1.922,
659
+ "step": 880
660
+ },
661
+ {
662
+ "epoch": 0.1512576478585996,
663
+ "grad_norm": 12.269794464111328,
664
+ "learning_rate": 9.9220513331823e-06,
665
+ "loss": 2.5347,
666
+ "step": 890
667
+ },
668
+ {
669
+ "epoch": 0.15295717199184228,
670
+ "grad_norm": 4.720547199249268,
671
+ "learning_rate": 9.916746912180369e-06,
672
+ "loss": 1.9091,
673
+ "step": 900
674
+ },
675
+ {
676
+ "epoch": 0.15465669612508498,
677
+ "grad_norm": 10.795013427734375,
678
+ "learning_rate": 9.911269412170906e-06,
679
+ "loss": 2.127,
680
+ "step": 910
681
+ },
682
+ {
683
+ "epoch": 0.15635622025832766,
684
+ "grad_norm": 9.331660270690918,
685
+ "learning_rate": 9.905619025972513e-06,
686
+ "loss": 2.0569,
687
+ "step": 920
688
+ },
689
+ {
690
+ "epoch": 0.15805574439157036,
691
+ "grad_norm": 8.150259017944336,
692
+ "learning_rate": 9.89979595248972e-06,
693
+ "loss": 1.5199,
694
+ "step": 930
695
+ },
696
+ {
697
+ "epoch": 0.15975526852481306,
698
+ "grad_norm": 5.694140434265137,
699
+ "learning_rate": 9.893800396705985e-06,
700
+ "loss": 2.1115,
701
+ "step": 940
702
+ },
703
+ {
704
+ "epoch": 0.16145479265805573,
705
+ "grad_norm": 6.206644535064697,
706
+ "learning_rate": 9.887632569676475e-06,
707
+ "loss": 2.3499,
708
+ "step": 950
709
+ },
710
+ {
711
+ "epoch": 0.16315431679129844,
712
+ "grad_norm": 4.6663408279418945,
713
+ "learning_rate": 9.881292688520638e-06,
714
+ "loss": 2.2428,
715
+ "step": 960
716
+ },
717
+ {
718
+ "epoch": 0.16485384092454114,
719
+ "grad_norm": 7.22851037979126,
720
+ "learning_rate": 9.874780976414565e-06,
721
+ "loss": 2.3808,
722
+ "step": 970
723
+ },
724
+ {
725
+ "epoch": 0.16655336505778381,
726
+ "grad_norm": 5.55798864364624,
727
+ "learning_rate": 9.86809766258312e-06,
728
+ "loss": 2.0025,
729
+ "step": 980
730
+ },
731
+ {
732
+ "epoch": 0.16825288919102652,
733
+ "grad_norm": 5.861523628234863,
734
+ "learning_rate": 9.86124298229189e-06,
735
+ "loss": 2.0427,
736
+ "step": 990
737
+ },
738
+ {
739
+ "epoch": 0.1699524133242692,
740
+ "grad_norm": 6.302640438079834,
741
+ "learning_rate": 9.854217176838886e-06,
742
+ "loss": 1.8858,
743
+ "step": 1000
744
+ },
745
+ {
746
+ "epoch": 0.1699524133242692,
747
+ "eval_cosine_accuracy@1": 0.682,
748
+ "eval_cosine_accuracy@10": 0.906,
749
+ "eval_cosine_accuracy@3": 0.8185,
750
+ "eval_cosine_accuracy@5": 0.858,
751
+ "eval_cosine_map@100": 0.762636433194321,
752
+ "eval_cosine_mrr@10": 0.7592498015873005,
753
+ "eval_cosine_ndcg@10": 0.7948932488118442,
754
+ "eval_cosine_precision@1": 0.682,
755
+ "eval_cosine_precision@10": 0.09060000000000001,
756
+ "eval_cosine_precision@3": 0.2728333333333333,
757
+ "eval_cosine_precision@5": 0.1716,
758
+ "eval_cosine_recall@1": 0.682,
759
+ "eval_cosine_recall@10": 0.906,
760
+ "eval_cosine_recall@3": 0.8185,
761
+ "eval_cosine_recall@5": 0.858,
762
+ "eval_loss": 1.8376002311706543,
763
+ "eval_runtime": 2.7558,
764
+ "eval_samples_per_second": 273.604,
765
+ "eval_sequential_score": 0.762636433194321,
766
+ "eval_steps_per_second": 2.177,
767
+ "eval_sts-dev_pearson_cosine": 0.7985157354895591,
768
+ "eval_sts-dev_pearson_dot": 0.7942162925984169,
769
+ "eval_sts-dev_pearson_euclidean": 0.7855985286403331,
770
+ "eval_sts-dev_pearson_manhattan": 0.7854261466194248,
771
+ "eval_sts-dev_pearson_max": 0.7985157354895591,
772
+ "eval_sts-dev_spearman_cosine": 0.8055793147203388,
773
+ "eval_sts-dev_spearman_dot": 0.8010661505883895,
774
+ "eval_sts-dev_spearman_euclidean": 0.8034068438279502,
775
+ "eval_sts-dev_spearman_manhattan": 0.8018568915859646,
776
+ "eval_sts-dev_spearman_max": 0.8055793147203388,
777
+ "step": 1000
778
+ },
779
+ {
780
+ "epoch": 0.1716519374575119,
781
+ "grad_norm": 7.927140235900879,
782
+ "learning_rate": 9.847020493546058e-06,
783
+ "loss": 1.9554,
784
+ "step": 1010
785
+ },
786
+ {
787
+ "epoch": 0.1733514615907546,
788
+ "grad_norm": 8.299236297607422,
789
+ "learning_rate": 9.839653185750588e-06,
790
+ "loss": 2.2341,
791
+ "step": 1020
792
+ },
793
+ {
794
+ "epoch": 0.17505098572399727,
795
+ "grad_norm": 8.708532333374023,
796
+ "learning_rate": 9.83211551279597e-06,
797
+ "loss": 2.4421,
798
+ "step": 1030
799
+ },
800
+ {
801
+ "epoch": 0.17675050985723997,
802
+ "grad_norm": 9.657612800598145,
803
+ "learning_rate": 9.824407740022878e-06,
804
+ "loss": 1.8227,
805
+ "step": 1040
806
+ },
807
+ {
808
+ "epoch": 0.17845003399048268,
809
+ "grad_norm": 6.149839401245117,
810
+ "learning_rate": 9.816530138759837e-06,
811
+ "loss": 2.1157,
812
+ "step": 1050
813
+ },
814
+ {
815
+ "epoch": 0.18014955812372535,
816
+ "grad_norm": 5.039497375488281,
817
+ "learning_rate": 9.808482986313653e-06,
818
+ "loss": 1.7899,
819
+ "step": 1060
820
+ },
821
+ {
822
+ "epoch": 0.18184908225696805,
823
+ "grad_norm": 8.380406379699707,
824
+ "learning_rate": 9.80026656595967e-06,
825
+ "loss": 1.8153,
826
+ "step": 1070
827
+ },
828
+ {
829
+ "epoch": 0.18354860639021073,
830
+ "grad_norm": 5.239027500152588,
831
+ "learning_rate": 9.791881166931788e-06,
832
+ "loss": 2.0095,
833
+ "step": 1080
834
+ },
835
+ {
836
+ "epoch": 0.18524813052345343,
837
+ "grad_norm": 6.085160732269287,
838
+ "learning_rate": 9.783327084412277e-06,
839
+ "loss": 1.5087,
840
+ "step": 1090
841
+ },
842
+ {
843
+ "epoch": 0.18694765465669613,
844
+ "grad_norm": 5.803791522979736,
845
+ "learning_rate": 9.774604619521404e-06,
846
+ "loss": 1.9106,
847
+ "step": 1100
848
+ },
849
+ {
850
+ "epoch": 0.1886471787899388,
851
+ "grad_norm": 5.130899906158447,
852
+ "learning_rate": 9.765714079306814e-06,
853
+ "loss": 2.0189,
854
+ "step": 1110
855
+ },
856
+ {
857
+ "epoch": 0.1903467029231815,
858
+ "grad_norm": 13.715828895568848,
859
+ "learning_rate": 9.756655776732727e-06,
860
+ "loss": 2.3282,
861
+ "step": 1120
862
+ },
863
+ {
864
+ "epoch": 0.1920462270564242,
865
+ "grad_norm": 9.30579662322998,
866
+ "learning_rate": 9.74743003066893e-06,
867
+ "loss": 2.147,
868
+ "step": 1130
869
+ },
870
+ {
871
+ "epoch": 0.1937457511896669,
872
+ "grad_norm": 6.4321675300598145,
873
+ "learning_rate": 9.738037165879538e-06,
874
+ "loss": 1.9017,
875
+ "step": 1140
876
+ },
877
+ {
878
+ "epoch": 0.1954452753229096,
879
+ "grad_norm": 5.368739128112793,
880
+ "learning_rate": 9.72847751301157e-06,
881
+ "loss": 2.102,
882
+ "step": 1150
883
+ },
884
+ {
885
+ "epoch": 0.19714479945615226,
886
+ "grad_norm": 5.458316802978516,
887
+ "learning_rate": 9.718751408583312e-06,
888
+ "loss": 2.1275,
889
+ "step": 1160
890
+ },
891
+ {
892
+ "epoch": 0.19884432358939497,
893
+ "grad_norm": 8.618191719055176,
894
+ "learning_rate": 9.708859194972462e-06,
895
+ "loss": 2.6718,
896
+ "step": 1170
897
+ },
898
+ {
899
+ "epoch": 0.20054384772263767,
900
+ "grad_norm": 7.549973487854004,
901
+ "learning_rate": 9.698801220404086e-06,
902
+ "loss": 1.8549,
903
+ "step": 1180
904
+ },
905
+ {
906
+ "epoch": 0.20224337185588034,
907
+ "grad_norm": 5.044092655181885,
908
+ "learning_rate": 9.688577838938358e-06,
909
+ "loss": 2.1802,
910
+ "step": 1190
911
+ },
912
+ {
913
+ "epoch": 0.20394289598912305,
914
+ "grad_norm": 5.195682048797607,
915
+ "learning_rate": 9.67818941045809e-06,
916
+ "loss": 2.2905,
917
+ "step": 1200
918
+ },
919
+ {
920
+ "epoch": 0.20564242012236575,
921
+ "grad_norm": 11.789019584655762,
922
+ "learning_rate": 9.667636300656073e-06,
923
+ "loss": 2.283,
924
+ "step": 1210
925
+ },
926
+ {
927
+ "epoch": 0.20734194425560842,
928
+ "grad_norm": 7.097940921783447,
929
+ "learning_rate": 9.656918881022196e-06,
930
+ "loss": 2.1976,
931
+ "step": 1220
932
+ },
933
+ {
934
+ "epoch": 0.20904146838885113,
935
+ "grad_norm": 10.327863693237305,
936
+ "learning_rate": 9.646037528830374e-06,
937
+ "loss": 2.5894,
938
+ "step": 1230
939
+ },
940
+ {
941
+ "epoch": 0.2107409925220938,
942
+ "grad_norm": 6.603916645050049,
943
+ "learning_rate": 9.634992627125264e-06,
944
+ "loss": 1.7659,
945
+ "step": 1240
946
+ },
947
+ {
948
+ "epoch": 0.2124405166553365,
949
+ "grad_norm": 4.55158805847168,
950
+ "learning_rate": 9.623784564708782e-06,
951
+ "loss": 1.9581,
952
+ "step": 1250
953
+ },
954
+ {
955
+ "epoch": 0.2141400407885792,
956
+ "grad_norm": 6.308363437652588,
957
+ "learning_rate": 9.612413736126421e-06,
958
+ "loss": 2.1419,
959
+ "step": 1260
960
+ },
961
+ {
962
+ "epoch": 0.21583956492182188,
963
+ "grad_norm": 5.769819259643555,
964
+ "learning_rate": 9.600880541653352e-06,
965
+ "loss": 1.7988,
966
+ "step": 1270
967
+ },
968
+ {
969
+ "epoch": 0.21753908905506458,
970
+ "grad_norm": 9.009718894958496,
971
+ "learning_rate": 9.589185387280343e-06,
972
+ "loss": 2.3537,
973
+ "step": 1280
974
+ },
975
+ {
976
+ "epoch": 0.21923861318830729,
977
+ "grad_norm": 10.911920547485352,
978
+ "learning_rate": 9.577328684699468e-06,
979
+ "loss": 2.2776,
980
+ "step": 1290
981
+ },
982
+ {
983
+ "epoch": 0.22093813732154996,
984
+ "grad_norm": 7.810164928436279,
985
+ "learning_rate": 9.565310851289602e-06,
986
+ "loss": 2.0633,
987
+ "step": 1300
988
+ },
989
+ {
990
+ "epoch": 0.22263766145479266,
991
+ "grad_norm": 7.256144046783447,
992
+ "learning_rate": 9.553132310101741e-06,
993
+ "loss": 2.247,
994
+ "step": 1310
995
+ },
996
+ {
997
+ "epoch": 0.22433718558803534,
998
+ "grad_norm": 8.476386070251465,
999
+ "learning_rate": 9.540793489844106e-06,
1000
+ "loss": 1.6592,
1001
+ "step": 1320
1002
+ },
1003
+ {
1004
+ "epoch": 0.22603670972127804,
1005
+ "grad_norm": 7.557313919067383,
1006
+ "learning_rate": 9.528294824867054e-06,
1007
+ "loss": 1.8965,
1008
+ "step": 1330
1009
+ },
1010
+ {
1011
+ "epoch": 0.22773623385452074,
1012
+ "grad_norm": 5.973213195800781,
1013
+ "learning_rate": 9.515636755147781e-06,
1014
+ "loss": 2.3415,
1015
+ "step": 1340
1016
+ },
1017
+ {
1018
+ "epoch": 0.22943575798776342,
1019
+ "grad_norm": 6.3328471183776855,
1020
+ "learning_rate": 9.50281972627484e-06,
1021
+ "loss": 1.7645,
1022
+ "step": 1350
1023
+ },
1024
+ {
1025
+ "epoch": 0.23113528212100612,
1026
+ "grad_norm": 12.455780982971191,
1027
+ "learning_rate": 9.489844189432456e-06,
1028
+ "loss": 1.9076,
1029
+ "step": 1360
1030
+ },
1031
+ {
1032
+ "epoch": 0.23283480625424882,
1033
+ "grad_norm": 6.662944316864014,
1034
+ "learning_rate": 9.476710601384639e-06,
1035
+ "loss": 2.0724,
1036
+ "step": 1370
1037
+ },
1038
+ {
1039
+ "epoch": 0.2345343303874915,
1040
+ "grad_norm": 8.458195686340332,
1041
+ "learning_rate": 9.463419424459108e-06,
1042
+ "loss": 2.0766,
1043
+ "step": 1380
1044
+ },
1045
+ {
1046
+ "epoch": 0.2362338545207342,
1047
+ "grad_norm": 5.073389530181885,
1048
+ "learning_rate": 9.449971126531015e-06,
1049
+ "loss": 1.6035,
1050
+ "step": 1390
1051
+ },
1052
+ {
1053
+ "epoch": 0.23793337865397687,
1054
+ "grad_norm": 9.805305480957031,
1055
+ "learning_rate": 9.436366181006476e-06,
1056
+ "loss": 2.1389,
1057
+ "step": 1400
1058
+ },
1059
+ {
1060
+ "epoch": 0.23963290278721958,
1061
+ "grad_norm": 5.322582244873047,
1062
+ "learning_rate": 9.422605066805906e-06,
1063
+ "loss": 1.8332,
1064
+ "step": 1410
1065
+ },
1066
+ {
1067
+ "epoch": 0.24133242692046228,
1068
+ "grad_norm": 6.252927780151367,
1069
+ "learning_rate": 9.408688268347157e-06,
1070
+ "loss": 1.8316,
1071
+ "step": 1420
1072
+ },
1073
+ {
1074
+ "epoch": 0.24303195105370495,
1075
+ "grad_norm": 6.532680034637451,
1076
+ "learning_rate": 9.394616275528475e-06,
1077
+ "loss": 1.754,
1078
+ "step": 1430
1079
+ },
1080
+ {
1081
+ "epoch": 0.24473147518694766,
1082
+ "grad_norm": 10.015703201293945,
1083
+ "learning_rate": 9.380389583711238e-06,
1084
+ "loss": 2.0633,
1085
+ "step": 1440
1086
+ },
1087
+ {
1088
+ "epoch": 0.24643099932019036,
1089
+ "grad_norm": 4.93398904800415,
1090
+ "learning_rate": 9.366008693702535e-06,
1091
+ "loss": 1.9549,
1092
+ "step": 1450
1093
+ },
1094
+ {
1095
+ "epoch": 0.24813052345343303,
1096
+ "grad_norm": 10.338335037231445,
1097
+ "learning_rate": 9.351474111737531e-06,
1098
+ "loss": 2.1118,
1099
+ "step": 1460
1100
+ },
1101
+ {
1102
+ "epoch": 0.24983004758667574,
1103
+ "grad_norm": 6.729907512664795,
1104
+ "learning_rate": 9.336786349461637e-06,
1105
+ "loss": 2.1022,
1106
+ "step": 1470
1107
+ },
1108
+ {
1109
+ "epoch": 0.2515295717199184,
1110
+ "grad_norm": 6.920368194580078,
1111
+ "learning_rate": 9.321945923912518e-06,
1112
+ "loss": 1.7638,
1113
+ "step": 1480
1114
+ },
1115
+ {
1116
+ "epoch": 0.2532290958531611,
1117
+ "grad_norm": 8.145246505737305,
1118
+ "learning_rate": 9.306953357501874e-06,
1119
+ "loss": 2.3552,
1120
+ "step": 1490
1121
+ },
1122
+ {
1123
+ "epoch": 0.2549286199864038,
1124
+ "grad_norm": 7.566394805908203,
1125
+ "learning_rate": 9.291809177997061e-06,
1126
+ "loss": 2.3227,
1127
+ "step": 1500
1128
+ },
1129
+ {
1130
+ "epoch": 0.2549286199864038,
1131
+ "eval_cosine_accuracy@1": 0.6785,
1132
+ "eval_cosine_accuracy@10": 0.909,
1133
+ "eval_cosine_accuracy@3": 0.8225,
1134
+ "eval_cosine_accuracy@5": 0.863,
1135
+ "eval_cosine_map@100": 0.7639697235001636,
1136
+ "eval_cosine_mrr@10": 0.7603734126984125,
1137
+ "eval_cosine_ndcg@10": 0.7966821763190639,
1138
+ "eval_cosine_precision@1": 0.6785,
1139
+ "eval_cosine_precision@10": 0.09090000000000001,
1140
+ "eval_cosine_precision@3": 0.2741666666666666,
1141
+ "eval_cosine_precision@5": 0.17260000000000003,
1142
+ "eval_cosine_recall@1": 0.6785,
1143
+ "eval_cosine_recall@10": 0.909,
1144
+ "eval_cosine_recall@3": 0.8225,
1145
+ "eval_cosine_recall@5": 0.863,
1146
+ "eval_loss": 1.7280288934707642,
1147
+ "eval_runtime": 2.966,
1148
+ "eval_samples_per_second": 254.217,
1149
+ "eval_sequential_score": 0.7639697235001636,
1150
+ "eval_steps_per_second": 2.023,
1151
+ "eval_sts-dev_pearson_cosine": 0.7956596138775444,
1152
+ "eval_sts-dev_pearson_dot": 0.7898276993726723,
1153
+ "eval_sts-dev_pearson_euclidean": 0.7849266959832228,
1154
+ "eval_sts-dev_pearson_manhattan": 0.7850471763202945,
1155
+ "eval_sts-dev_pearson_max": 0.7956596138775444,
1156
+ "eval_sts-dev_spearman_cosine": 0.8035475738008022,
1157
+ "eval_sts-dev_spearman_dot": 0.7954777202652852,
1158
+ "eval_sts-dev_spearman_euclidean": 0.7982728913757071,
1159
+ "eval_sts-dev_spearman_manhattan": 0.8004630496417331,
1160
+ "eval_sts-dev_spearman_max": 0.8035475738008022,
1161
+ "step": 1500
1162
+ },
1163
+ {
1164
+ "epoch": 0.2566281441196465,
1165
+ "grad_norm": 8.947423934936523,
1166
+ "learning_rate": 9.276513918502508e-06,
1167
+ "loss": 2.3639,
1168
+ "step": 1510
1169
+ },
1170
+ {
1171
+ "epoch": 0.25832766825288916,
1172
+ "grad_norm": 7.069028377532959,
1173
+ "learning_rate": 9.261068117440955e-06,
1174
+ "loss": 1.717,
1175
+ "step": 1520
1176
+ },
1177
+ {
1178
+ "epoch": 0.26002719238613187,
1179
+ "grad_norm": 9.209027290344238,
1180
+ "learning_rate": 9.24547231853449e-06,
1181
+ "loss": 2.0703,
1182
+ "step": 1530
1183
+ },
1184
+ {
1185
+ "epoch": 0.26172671651937457,
1186
+ "grad_norm": 6.722319602966309,
1187
+ "learning_rate": 9.229727070785423e-06,
1188
+ "loss": 1.9918,
1189
+ "step": 1540
1190
+ },
1191
+ {
1192
+ "epoch": 0.26342624065261727,
1193
+ "grad_norm": 7.064426422119141,
1194
+ "learning_rate": 9.21383292845695e-06,
1195
+ "loss": 2.0636,
1196
+ "step": 1550
1197
+ },
1198
+ {
1199
+ "epoch": 0.26512576478586,
1200
+ "grad_norm": 5.03215217590332,
1201
+ "learning_rate": 9.197790451053641e-06,
1202
+ "loss": 1.7062,
1203
+ "step": 1560
1204
+ },
1205
+ {
1206
+ "epoch": 0.2668252889191027,
1207
+ "grad_norm": 8.875913619995117,
1208
+ "learning_rate": 9.18160020330175e-06,
1209
+ "loss": 2.2969,
1210
+ "step": 1570
1211
+ },
1212
+ {
1213
+ "epoch": 0.2685248130523453,
1214
+ "grad_norm": 8.441971778869629,
1215
+ "learning_rate": 9.165262755129337e-06,
1216
+ "loss": 1.9633,
1217
+ "step": 1580
1218
+ },
1219
+ {
1220
+ "epoch": 0.270224337185588,
1221
+ "grad_norm": 6.123201847076416,
1222
+ "learning_rate": 9.148778681646196e-06,
1223
+ "loss": 1.8068,
1224
+ "step": 1590
1225
+ },
1226
+ {
1227
+ "epoch": 0.27192386131883073,
1228
+ "grad_norm": 4.153637886047363,
1229
+ "learning_rate": 9.132148563123617e-06,
1230
+ "loss": 1.6155,
1231
+ "step": 1600
1232
+ },
1233
+ {
1234
+ "epoch": 0.27362338545207343,
1235
+ "grad_norm": 5.999012470245361,
1236
+ "learning_rate": 9.115372984973963e-06,
1237
+ "loss": 1.6978,
1238
+ "step": 1610
1239
+ },
1240
+ {
1241
+ "epoch": 0.27532290958531613,
1242
+ "grad_norm": 7.118943214416504,
1243
+ "learning_rate": 9.098452537730049e-06,
1244
+ "loss": 2.1703,
1245
+ "step": 1620
1246
+ },
1247
+ {
1248
+ "epoch": 0.2770224337185588,
1249
+ "grad_norm": 7.293047904968262,
1250
+ "learning_rate": 9.081387817024371e-06,
1251
+ "loss": 2.5395,
1252
+ "step": 1630
1253
+ },
1254
+ {
1255
+ "epoch": 0.2787219578518015,
1256
+ "grad_norm": 4.569684982299805,
1257
+ "learning_rate": 9.064179423568122e-06,
1258
+ "loss": 1.7744,
1259
+ "step": 1640
1260
+ },
1261
+ {
1262
+ "epoch": 0.2804214819850442,
1263
+ "grad_norm": 5.9083404541015625,
1264
+ "learning_rate": 9.046827963130063e-06,
1265
+ "loss": 1.7637,
1266
+ "step": 1650
1267
+ },
1268
+ {
1269
+ "epoch": 0.2821210061182869,
1270
+ "grad_norm": 7.332223892211914,
1271
+ "learning_rate": 9.02933404651518e-06,
1272
+ "loss": 1.9622,
1273
+ "step": 1660
1274
+ },
1275
+ {
1276
+ "epoch": 0.2838205302515296,
1277
+ "grad_norm": 6.511028289794922,
1278
+ "learning_rate": 9.011698289543199e-06,
1279
+ "loss": 1.3597,
1280
+ "step": 1670
1281
+ },
1282
+ {
1283
+ "epoch": 0.28552005438477224,
1284
+ "grad_norm": 6.4721832275390625,
1285
+ "learning_rate": 8.9939213130269e-06,
1286
+ "loss": 1.6605,
1287
+ "step": 1680
1288
+ },
1289
+ {
1290
+ "epoch": 0.28721957851801494,
1291
+ "grad_norm": 7.247612953186035,
1292
+ "learning_rate": 8.97600374275026e-06,
1293
+ "loss": 2.166,
1294
+ "step": 1690
1295
+ },
1296
+ {
1297
+ "epoch": 0.28891910265125764,
1298
+ "grad_norm": 10.479696273803711,
1299
+ "learning_rate": 8.957946209446435e-06,
1300
+ "loss": 1.959,
1301
+ "step": 1700
1302
+ },
1303
+ {
1304
+ "epoch": 0.29061862678450034,
1305
+ "grad_norm": 6.533411026000977,
1306
+ "learning_rate": 8.939749348775544e-06,
1307
+ "loss": 2.0912,
1308
+ "step": 1710
1309
+ },
1310
+ {
1311
+ "epoch": 0.29231815091774305,
1312
+ "grad_norm": 8.847211837768555,
1313
+ "learning_rate": 8.921413801302305e-06,
1314
+ "loss": 2.0446,
1315
+ "step": 1720
1316
+ },
1317
+ {
1318
+ "epoch": 0.29401767505098575,
1319
+ "grad_norm": 6.280109882354736,
1320
+ "learning_rate": 8.902940212473477e-06,
1321
+ "loss": 2.1172,
1322
+ "step": 1730
1323
+ },
1324
+ {
1325
+ "epoch": 0.2957171991842284,
1326
+ "grad_norm": 6.145227909088135,
1327
+ "learning_rate": 8.884329232595139e-06,
1328
+ "loss": 1.8379,
1329
+ "step": 1740
1330
+ },
1331
+ {
1332
+ "epoch": 0.2974167233174711,
1333
+ "grad_norm": 6.113027095794678,
1334
+ "learning_rate": 8.865581516809806e-06,
1335
+ "loss": 2.0789,
1336
+ "step": 1750
1337
+ },
1338
+ {
1339
+ "epoch": 0.2991162474507138,
1340
+ "grad_norm": 8.188186645507812,
1341
+ "learning_rate": 8.846697725073359e-06,
1342
+ "loss": 1.8014,
1343
+ "step": 1760
1344
+ },
1345
+ {
1346
+ "epoch": 0.3008157715839565,
1347
+ "grad_norm": 7.898641109466553,
1348
+ "learning_rate": 8.827678522131815e-06,
1349
+ "loss": 1.7158,
1350
+ "step": 1770
1351
+ },
1352
+ {
1353
+ "epoch": 0.3025152957171992,
1354
+ "grad_norm": 8.399277687072754,
1355
+ "learning_rate": 8.80852457749793e-06,
1356
+ "loss": 1.785,
1357
+ "step": 1780
1358
+ },
1359
+ {
1360
+ "epoch": 0.30421481985044185,
1361
+ "grad_norm": 4.910240650177002,
1362
+ "learning_rate": 8.789236565427627e-06,
1363
+ "loss": 1.9891,
1364
+ "step": 1790
1365
+ },
1366
+ {
1367
+ "epoch": 0.30591434398368456,
1368
+ "grad_norm": 10.41693115234375,
1369
+ "learning_rate": 8.769815164896262e-06,
1370
+ "loss": 2.134,
1371
+ "step": 1800
1372
+ },
1373
+ {
1374
+ "epoch": 0.30761386811692726,
1375
+ "grad_norm": 8.666768074035645,
1376
+ "learning_rate": 8.750261059574722e-06,
1377
+ "loss": 1.8226,
1378
+ "step": 1810
1379
+ },
1380
+ {
1381
+ "epoch": 0.30931339225016996,
1382
+ "grad_norm": 4.843673229217529,
1383
+ "learning_rate": 8.730574937805363e-06,
1384
+ "loss": 1.9502,
1385
+ "step": 1820
1386
+ },
1387
+ {
1388
+ "epoch": 0.31101291638341266,
1389
+ "grad_norm": 6.370514392852783,
1390
+ "learning_rate": 8.71075749257777e-06,
1391
+ "loss": 2.0422,
1392
+ "step": 1830
1393
+ },
1394
+ {
1395
+ "epoch": 0.3127124405166553,
1396
+ "grad_norm": 4.965259552001953,
1397
+ "learning_rate": 8.690809421504375e-06,
1398
+ "loss": 1.7759,
1399
+ "step": 1840
1400
+ },
1401
+ {
1402
+ "epoch": 0.314411964649898,
1403
+ "grad_norm": 6.794595241546631,
1404
+ "learning_rate": 8.670731426795888e-06,
1405
+ "loss": 2.0413,
1406
+ "step": 1850
1407
+ },
1408
+ {
1409
+ "epoch": 0.3161114887831407,
1410
+ "grad_norm": 4.695109844207764,
1411
+ "learning_rate": 8.65052421523658e-06,
1412
+ "loss": 1.9104,
1413
+ "step": 1860
1414
+ },
1415
+ {
1416
+ "epoch": 0.3178110129163834,
1417
+ "grad_norm": 6.058196544647217,
1418
+ "learning_rate": 8.63018849815941e-06,
1419
+ "loss": 2.3626,
1420
+ "step": 1870
1421
+ },
1422
+ {
1423
+ "epoch": 0.3195105370496261,
1424
+ "grad_norm": 4.891868591308594,
1425
+ "learning_rate": 8.609724991420981e-06,
1426
+ "loss": 1.8832,
1427
+ "step": 1880
1428
+ },
1429
+ {
1430
+ "epoch": 0.3212100611828688,
1431
+ "grad_norm": 5.250426292419434,
1432
+ "learning_rate": 8.589134415376333e-06,
1433
+ "loss": 2.0618,
1434
+ "step": 1890
1435
+ },
1436
+ {
1437
+ "epoch": 0.32290958531611147,
1438
+ "grad_norm": 4.848729133605957,
1439
+ "learning_rate": 8.568417494853598e-06,
1440
+ "loss": 2.0059,
1441
+ "step": 1900
1442
+ },
1443
+ {
1444
+ "epoch": 0.32460910944935417,
1445
+ "grad_norm": 8.6367769241333,
1446
+ "learning_rate": 8.547574959128472e-06,
1447
+ "loss": 1.9333,
1448
+ "step": 1910
1449
+ },
1450
+ {
1451
+ "epoch": 0.3263086335825969,
1452
+ "grad_norm": 5.955835342407227,
1453
+ "learning_rate": 8.526607541898555e-06,
1454
+ "loss": 2.3307,
1455
+ "step": 1920
1456
+ },
1457
+ {
1458
+ "epoch": 0.3280081577158396,
1459
+ "grad_norm": 7.700503349304199,
1460
+ "learning_rate": 8.505515981257515e-06,
1461
+ "loss": 2.3562,
1462
+ "step": 1930
1463
+ },
1464
+ {
1465
+ "epoch": 0.3297076818490823,
1466
+ "grad_norm": 6.47861909866333,
1467
+ "learning_rate": 8.48430101966911e-06,
1468
+ "loss": 1.7028,
1469
+ "step": 1940
1470
+ },
1471
+ {
1472
+ "epoch": 0.3314072059823249,
1473
+ "grad_norm": 6.167905807495117,
1474
+ "learning_rate": 8.462963403941046e-06,
1475
+ "loss": 1.6633,
1476
+ "step": 1950
1477
+ },
1478
+ {
1479
+ "epoch": 0.33310673011556763,
1480
+ "grad_norm": 5.260265827178955,
1481
+ "learning_rate": 8.441503885198699e-06,
1482
+ "loss": 1.6875,
1483
+ "step": 1960
1484
+ },
1485
+ {
1486
+ "epoch": 0.33480625424881033,
1487
+ "grad_norm": 5.841998100280762,
1488
+ "learning_rate": 8.419923218858658e-06,
1489
+ "loss": 1.9344,
1490
+ "step": 1970
1491
+ },
1492
+ {
1493
+ "epoch": 0.33650577838205303,
1494
+ "grad_norm": 9.941591262817383,
1495
+ "learning_rate": 8.398222164602151e-06,
1496
+ "loss": 2.1813,
1497
+ "step": 1980
1498
+ },
1499
+ {
1500
+ "epoch": 0.33820530251529574,
1501
+ "grad_norm": 8.419736862182617,
1502
+ "learning_rate": 8.376401486348286e-06,
1503
+ "loss": 1.9077,
1504
+ "step": 1990
1505
+ },
1506
+ {
1507
+ "epoch": 0.3399048266485384,
1508
+ "grad_norm": 6.201753616333008,
1509
+ "learning_rate": 8.354461952227175e-06,
1510
+ "loss": 1.9249,
1511
+ "step": 2000
1512
+ },
1513
+ {
1514
+ "epoch": 0.3399048266485384,
1515
+ "eval_cosine_accuracy@1": 0.695,
1516
+ "eval_cosine_accuracy@10": 0.9095,
1517
+ "eval_cosine_accuracy@3": 0.831,
1518
+ "eval_cosine_accuracy@5": 0.871,
1519
+ "eval_cosine_map@100": 0.7742690226366353,
1520
+ "eval_cosine_mrr@10": 0.7707448412698402,
1521
+ "eval_cosine_ndcg@10": 0.8045879491595074,
1522
+ "eval_cosine_precision@1": 0.695,
1523
+ "eval_cosine_precision@10": 0.09095000000000002,
1524
+ "eval_cosine_precision@3": 0.277,
1525
+ "eval_cosine_precision@5": 0.17420000000000002,
1526
+ "eval_cosine_recall@1": 0.695,
1527
+ "eval_cosine_recall@10": 0.9095,
1528
+ "eval_cosine_recall@3": 0.831,
1529
+ "eval_cosine_recall@5": 0.871,
1530
+ "eval_loss": 1.7007904052734375,
1531
+ "eval_runtime": 2.7948,
1532
+ "eval_samples_per_second": 269.79,
1533
+ "eval_sequential_score": 0.7742690226366353,
1534
+ "eval_steps_per_second": 2.147,
1535
+ "eval_sts-dev_pearson_cosine": 0.7952822056751716,
1536
+ "eval_sts-dev_pearson_dot": 0.7915034361455775,
1537
+ "eval_sts-dev_pearson_euclidean": 0.7874083556892966,
1538
+ "eval_sts-dev_pearson_manhattan": 0.7873169677574228,
1539
+ "eval_sts-dev_pearson_max": 0.7952822056751716,
1540
+ "eval_sts-dev_spearman_cosine": 0.8004780454585122,
1541
+ "eval_sts-dev_spearman_dot": 0.7972326119671326,
1542
+ "eval_sts-dev_spearman_euclidean": 0.7979533651258709,
1543
+ "eval_sts-dev_spearman_manhattan": 0.7990561344213346,
1544
+ "eval_sts-dev_spearman_max": 0.8004780454585122,
1545
+ "step": 2000
1546
+ },
1547
+ {
1548
+ "epoch": 0.3416043507817811,
1549
+ "grad_norm": 5.67038106918335,
1550
+ "learning_rate": 8.332404334552882e-06,
1551
+ "loss": 1.7941,
1552
+ "step": 2010
1553
+ },
1554
+ {
1555
+ "epoch": 0.3433038749150238,
1556
+ "grad_norm": 5.878876686096191,
1557
+ "learning_rate": 8.310229409796235e-06,
1558
+ "loss": 2.0737,
1559
+ "step": 2020
1560
+ },
1561
+ {
1562
+ "epoch": 0.3450033990482665,
1563
+ "grad_norm": 5.597275733947754,
1564
+ "learning_rate": 8.287937958557513e-06,
1565
+ "loss": 1.559,
1566
+ "step": 2030
1567
+ },
1568
+ {
1569
+ "epoch": 0.3467029231815092,
1570
+ "grad_norm": 6.382761478424072,
1571
+ "learning_rate": 8.265530765538938e-06,
1572
+ "loss": 1.3173,
1573
+ "step": 2040
1574
+ },
1575
+ {
1576
+ "epoch": 0.3484024473147519,
1577
+ "grad_norm": 7.027074813842773,
1578
+ "learning_rate": 8.243008619517074e-06,
1579
+ "loss": 2.1037,
1580
+ "step": 2050
1581
+ },
1582
+ {
1583
+ "epoch": 0.35010197144799454,
1584
+ "grad_norm": 6.432947158813477,
1585
+ "learning_rate": 8.220372313315053e-06,
1586
+ "loss": 2.27,
1587
+ "step": 2060
1588
+ },
1589
+ {
1590
+ "epoch": 0.35180149558123724,
1591
+ "grad_norm": 6.3885416984558105,
1592
+ "learning_rate": 8.197622643774663e-06,
1593
+ "loss": 1.7745,
1594
+ "step": 2070
1595
+ },
1596
+ {
1597
+ "epoch": 0.35350101971447995,
1598
+ "grad_norm": 6.885595798492432,
1599
+ "learning_rate": 8.174760411728308e-06,
1600
+ "loss": 1.3386,
1601
+ "step": 2080
1602
+ },
1603
+ {
1604
+ "epoch": 0.35520054384772265,
1605
+ "grad_norm": 7.397289752960205,
1606
+ "learning_rate": 8.1517864219708e-06,
1607
+ "loss": 1.9689,
1608
+ "step": 2090
1609
+ },
1610
+ {
1611
+ "epoch": 0.35690006798096535,
1612
+ "grad_norm": 4.5459885597229,
1613
+ "learning_rate": 8.128701483231048e-06,
1614
+ "loss": 1.699,
1615
+ "step": 2100
1616
+ },
1617
+ {
1618
+ "epoch": 0.358599592114208,
1619
+ "grad_norm": 4.440462589263916,
1620
+ "learning_rate": 8.105506408143571e-06,
1621
+ "loss": 1.8324,
1622
+ "step": 2110
1623
+ },
1624
+ {
1625
+ "epoch": 0.3602991162474507,
1626
+ "grad_norm": 5.693051338195801,
1627
+ "learning_rate": 8.082202013219912e-06,
1628
+ "loss": 1.8712,
1629
+ "step": 2120
1630
+ },
1631
+ {
1632
+ "epoch": 0.3619986403806934,
1633
+ "grad_norm": 7.692087650299072,
1634
+ "learning_rate": 8.058789118819872e-06,
1635
+ "loss": 2.4458,
1636
+ "step": 2130
1637
+ },
1638
+ {
1639
+ "epoch": 0.3636981645139361,
1640
+ "grad_norm": 5.480040073394775,
1641
+ "learning_rate": 8.03526854912265e-06,
1642
+ "loss": 1.7687,
1643
+ "step": 2140
1644
+ },
1645
+ {
1646
+ "epoch": 0.3653976886471788,
1647
+ "grad_norm": 7.727152347564697,
1648
+ "learning_rate": 8.011641132097822e-06,
1649
+ "loss": 1.9936,
1650
+ "step": 2150
1651
+ },
1652
+ {
1653
+ "epoch": 0.36709721278042146,
1654
+ "grad_norm": 5.268275260925293,
1655
+ "learning_rate": 7.987907699476202e-06,
1656
+ "loss": 1.7563,
1657
+ "step": 2160
1658
+ },
1659
+ {
1660
+ "epoch": 0.36879673691366416,
1661
+ "grad_norm": 12.53956413269043,
1662
+ "learning_rate": 7.96406908672055e-06,
1663
+ "loss": 2.0094,
1664
+ "step": 2170
1665
+ },
1666
+ {
1667
+ "epoch": 0.37049626104690686,
1668
+ "grad_norm": 6.193202495574951,
1669
+ "learning_rate": 7.940126132996172e-06,
1670
+ "loss": 2.0172,
1671
+ "step": 2180
1672
+ },
1673
+ {
1674
+ "epoch": 0.37219578518014956,
1675
+ "grad_norm": 8.85962200164795,
1676
+ "learning_rate": 7.916079681141385e-06,
1677
+ "loss": 1.9722,
1678
+ "step": 2190
1679
+ },
1680
+ {
1681
+ "epoch": 0.37389530931339227,
1682
+ "grad_norm": 8.260863304138184,
1683
+ "learning_rate": 7.891930577637832e-06,
1684
+ "loss": 1.8102,
1685
+ "step": 2200
1686
+ },
1687
+ {
1688
+ "epoch": 0.37559483344663497,
1689
+ "grad_norm": 7.650066375732422,
1690
+ "learning_rate": 7.867679672580696e-06,
1691
+ "loss": 2.1561,
1692
+ "step": 2210
1693
+ },
1694
+ {
1695
+ "epoch": 0.3772943575798776,
1696
+ "grad_norm": 9.542856216430664,
1697
+ "learning_rate": 7.843327819648774e-06,
1698
+ "loss": 2.1181,
1699
+ "step": 2220
1700
+ },
1701
+ {
1702
+ "epoch": 0.3789938817131203,
1703
+ "grad_norm": 9.622944831848145,
1704
+ "learning_rate": 7.818875876074421e-06,
1705
+ "loss": 1.9387,
1706
+ "step": 2230
1707
+ },
1708
+ {
1709
+ "epoch": 0.380693405846363,
1710
+ "grad_norm": 5.241047382354736,
1711
+ "learning_rate": 7.794324702613376e-06,
1712
+ "loss": 1.7689,
1713
+ "step": 2240
1714
+ },
1715
+ {
1716
+ "epoch": 0.3823929299796057,
1717
+ "grad_norm": 10.481565475463867,
1718
+ "learning_rate": 7.769675163514463e-06,
1719
+ "loss": 1.9585,
1720
+ "step": 2250
1721
+ },
1722
+ {
1723
+ "epoch": 0.3840924541128484,
1724
+ "grad_norm": 8.667862892150879,
1725
+ "learning_rate": 7.744928126489169e-06,
1726
+ "loss": 1.5681,
1727
+ "step": 2260
1728
+ },
1729
+ {
1730
+ "epoch": 0.38579197824609107,
1731
+ "grad_norm": 5.036750793457031,
1732
+ "learning_rate": 7.720084462681092e-06,
1733
+ "loss": 2.0402,
1734
+ "step": 2270
1735
+ },
1736
+ {
1737
+ "epoch": 0.3874915023793338,
1738
+ "grad_norm": 5.431954383850098,
1739
+ "learning_rate": 7.695145046635281e-06,
1740
+ "loss": 1.78,
1741
+ "step": 2280
1742
+ },
1743
+ {
1744
+ "epoch": 0.3891910265125765,
1745
+ "grad_norm": 7.302848815917969,
1746
+ "learning_rate": 7.670110756267455e-06,
1747
+ "loss": 1.9524,
1748
+ "step": 2290
1749
+ },
1750
+ {
1751
+ "epoch": 0.3908905506458192,
1752
+ "grad_norm": 8.023348808288574,
1753
+ "learning_rate": 7.644982472833083e-06,
1754
+ "loss": 1.9678,
1755
+ "step": 2300
1756
+ },
1757
+ {
1758
+ "epoch": 0.3925900747790619,
1759
+ "grad_norm": 7.136281967163086,
1760
+ "learning_rate": 7.61976108089638e-06,
1761
+ "loss": 1.6553,
1762
+ "step": 2310
1763
+ },
1764
+ {
1765
+ "epoch": 0.39428959891230453,
1766
+ "grad_norm": 8.916802406311035,
1767
+ "learning_rate": 7.594447468299156e-06,
1768
+ "loss": 1.8378,
1769
+ "step": 2320
1770
+ },
1771
+ {
1772
+ "epoch": 0.39598912304554723,
1773
+ "grad_norm": 8.570812225341797,
1774
+ "learning_rate": 7.56904252612957e-06,
1775
+ "loss": 1.8066,
1776
+ "step": 2330
1777
+ },
1778
+ {
1779
+ "epoch": 0.39768864717878993,
1780
+ "grad_norm": 10.376533508300781,
1781
+ "learning_rate": 7.543547148690757e-06,
1782
+ "loss": 1.8585,
1783
+ "step": 2340
1784
+ },
1785
+ {
1786
+ "epoch": 0.39938817131203264,
1787
+ "grad_norm": 5.894679546356201,
1788
+ "learning_rate": 7.517962233469345e-06,
1789
+ "loss": 1.4762,
1790
+ "step": 2350
1791
+ },
1792
+ {
1793
+ "epoch": 0.40108769544527534,
1794
+ "grad_norm": 6.78580904006958,
1795
+ "learning_rate": 7.492288681103872e-06,
1796
+ "loss": 1.6828,
1797
+ "step": 2360
1798
+ },
1799
+ {
1800
+ "epoch": 0.40278721957851804,
1801
+ "grad_norm": 6.131538391113281,
1802
+ "learning_rate": 7.4665273953530695e-06,
1803
+ "loss": 1.6288,
1804
+ "step": 2370
1805
+ },
1806
+ {
1807
+ "epoch": 0.4044867437117607,
1808
+ "grad_norm": 5.721993923187256,
1809
+ "learning_rate": 7.440679283064059e-06,
1810
+ "loss": 1.7679,
1811
+ "step": 2380
1812
+ },
1813
+ {
1814
+ "epoch": 0.4061862678450034,
1815
+ "grad_norm": 6.973949909210205,
1816
+ "learning_rate": 7.41474525414042e-06,
1817
+ "loss": 2.128,
1818
+ "step": 2390
1819
+ },
1820
+ {
1821
+ "epoch": 0.4078857919782461,
1822
+ "grad_norm": 6.6178202629089355,
1823
+ "learning_rate": 7.388726221510163e-06,
1824
+ "loss": 1.3543,
1825
+ "step": 2400
1826
+ },
1827
+ {
1828
+ "epoch": 0.4095853161114888,
1829
+ "grad_norm": 5.370213985443115,
1830
+ "learning_rate": 7.3626231010935974e-06,
1831
+ "loss": 1.806,
1832
+ "step": 2410
1833
+ },
1834
+ {
1835
+ "epoch": 0.4112848402447315,
1836
+ "grad_norm": 8.617044448852539,
1837
+ "learning_rate": 7.336436811771085e-06,
1838
+ "loss": 1.8597,
1839
+ "step": 2420
1840
+ },
1841
+ {
1842
+ "epoch": 0.41298436437797414,
1843
+ "grad_norm": 12.804397583007812,
1844
+ "learning_rate": 7.310168275350692e-06,
1845
+ "loss": 2.2319,
1846
+ "step": 2430
1847
+ },
1848
+ {
1849
+ "epoch": 0.41468388851121685,
1850
+ "grad_norm": 8.611510276794434,
1851
+ "learning_rate": 7.28381841653574e-06,
1852
+ "loss": 1.9433,
1853
+ "step": 2440
1854
+ },
1855
+ {
1856
+ "epoch": 0.41638341264445955,
1857
+ "grad_norm": 5.746379375457764,
1858
+ "learning_rate": 7.257388162892261e-06,
1859
+ "loss": 1.7124,
1860
+ "step": 2450
1861
+ },
1862
+ {
1863
+ "epoch": 0.41808293677770225,
1864
+ "grad_norm": 9.15091609954834,
1865
+ "learning_rate": 7.23087844481634e-06,
1866
+ "loss": 1.702,
1867
+ "step": 2460
1868
+ },
1869
+ {
1870
+ "epoch": 0.41978246091094495,
1871
+ "grad_norm": 5.277801513671875,
1872
+ "learning_rate": 7.204290195501358e-06,
1873
+ "loss": 1.5501,
1874
+ "step": 2470
1875
+ },
1876
+ {
1877
+ "epoch": 0.4214819850441876,
1878
+ "grad_norm": 9.658906936645508,
1879
+ "learning_rate": 7.177624350905155e-06,
1880
+ "loss": 2.0232,
1881
+ "step": 2480
1882
+ },
1883
+ {
1884
+ "epoch": 0.4231815091774303,
1885
+ "grad_norm": 5.400667667388916,
1886
+ "learning_rate": 7.153559522573959e-06,
1887
+ "loss": 1.9549,
1888
+ "step": 2490
1889
+ },
1890
+ {
1891
+ "epoch": 0.424881033310673,
1892
+ "grad_norm": 6.810690402984619,
1893
+ "learning_rate": 7.126748835263528e-06,
1894
+ "loss": 1.9216,
1895
+ "step": 2500
1896
+ },
1897
+ {
1898
+ "epoch": 0.424881033310673,
1899
+ "eval_cosine_accuracy@1": 0.706,
1900
+ "eval_cosine_accuracy@10": 0.912,
1901
+ "eval_cosine_accuracy@3": 0.8375,
1902
+ "eval_cosine_accuracy@5": 0.87,
1903
+ "eval_cosine_map@100": 0.782229618428024,
1904
+ "eval_cosine_mrr@10": 0.7788738095238087,
1905
+ "eval_cosine_ndcg@10": 0.811319534720964,
1906
+ "eval_cosine_precision@1": 0.706,
1907
+ "eval_cosine_precision@10": 0.09120000000000002,
1908
+ "eval_cosine_precision@3": 0.2791666666666666,
1909
+ "eval_cosine_precision@5": 0.174,
1910
+ "eval_cosine_recall@1": 0.706,
1911
+ "eval_cosine_recall@10": 0.912,
1912
+ "eval_cosine_recall@3": 0.8375,
1913
+ "eval_cosine_recall@5": 0.87,
1914
+ "eval_loss": 1.6223056316375732,
1915
+ "eval_runtime": 2.7895,
1916
+ "eval_samples_per_second": 270.299,
1917
+ "eval_sequential_score": 0.782229618428024,
1918
+ "eval_steps_per_second": 2.151,
1919
+ "eval_sts-dev_pearson_cosine": 0.7982554363739512,
1920
+ "eval_sts-dev_pearson_dot": 0.7952587919868024,
1921
+ "eval_sts-dev_pearson_euclidean": 0.7869234232815028,
1922
+ "eval_sts-dev_pearson_manhattan": 0.7872366700437463,
1923
+ "eval_sts-dev_pearson_max": 0.7982554363739512,
1924
+ "eval_sts-dev_spearman_cosine": 0.8022561650534226,
1925
+ "eval_sts-dev_spearman_dot": 0.7985926145853256,
1926
+ "eval_sts-dev_spearman_euclidean": 0.8008740888247353,
1927
+ "eval_sts-dev_spearman_manhattan": 0.801728465873034,
1928
+ "eval_sts-dev_spearman_max": 0.8022561650534226,
1929
+ "step": 2500
1930
+ },
1931
+ {
1932
+ "epoch": 0.4265805574439157,
1933
+ "grad_norm": 6.482539653778076,
1934
+ "learning_rate": 7.099863282277834e-06,
1935
+ "loss": 1.5548,
1936
+ "step": 2510
1937
+ },
1938
+ {
1939
+ "epoch": 0.4282800815771584,
1940
+ "grad_norm": 7.853833198547363,
1941
+ "learning_rate": 7.072903810040381e-06,
1942
+ "loss": 1.6629,
1943
+ "step": 2520
1944
+ },
1945
+ {
1946
+ "epoch": 0.4299796057104011,
1947
+ "grad_norm": 6.526813507080078,
1948
+ "learning_rate": 7.0458713675767665e-06,
1949
+ "loss": 1.9463,
1950
+ "step": 2530
1951
+ },
1952
+ {
1953
+ "epoch": 0.43167912984364376,
1954
+ "grad_norm": 9.038359642028809,
1955
+ "learning_rate": 7.018766906481288e-06,
1956
+ "loss": 2.16,
1957
+ "step": 2540
1958
+ },
1959
+ {
1960
+ "epoch": 0.43337865397688646,
1961
+ "grad_norm": 5.049062728881836,
1962
+ "learning_rate": 6.991591380883435e-06,
1963
+ "loss": 1.6674,
1964
+ "step": 2550
1965
+ },
1966
+ {
1967
+ "epoch": 0.43507817811012917,
1968
+ "grad_norm": 6.659489154815674,
1969
+ "learning_rate": 6.964345747414307e-06,
1970
+ "loss": 1.7055,
1971
+ "step": 2560
1972
+ },
1973
+ {
1974
+ "epoch": 0.43677770224337187,
1975
+ "grad_norm": 8.091437339782715,
1976
+ "learning_rate": 6.937030965172935e-06,
1977
+ "loss": 1.8062,
1978
+ "step": 2570
1979
+ },
1980
+ {
1981
+ "epoch": 0.43847722637661457,
1982
+ "grad_norm": 7.644534587860107,
1983
+ "learning_rate": 6.909647995692522e-06,
1984
+ "loss": 2.1972,
1985
+ "step": 2580
1986
+ },
1987
+ {
1988
+ "epoch": 0.4401767505098572,
1989
+ "grad_norm": 7.349899768829346,
1990
+ "learning_rate": 6.88219780290659e-06,
1991
+ "loss": 1.9069,
1992
+ "step": 2590
1993
+ },
1994
+ {
1995
+ "epoch": 0.4418762746430999,
1996
+ "grad_norm": 8.312362670898438,
1997
+ "learning_rate": 6.854681353115056e-06,
1998
+ "loss": 2.6629,
1999
+ "step": 2600
2000
+ },
2001
+ {
2002
+ "epoch": 0.4435757987763426,
2003
+ "grad_norm": 8.937472343444824,
2004
+ "learning_rate": 6.82709961495021e-06,
2005
+ "loss": 2.4573,
2006
+ "step": 2610
2007
+ },
2008
+ {
2009
+ "epoch": 0.4452753229095853,
2010
+ "grad_norm": 6.268832683563232,
2011
+ "learning_rate": 6.799453559342619e-06,
2012
+ "loss": 2.0894,
2013
+ "step": 2620
2014
+ },
2015
+ {
2016
+ "epoch": 0.446974847042828,
2017
+ "grad_norm": 9.547843933105469,
2018
+ "learning_rate": 6.771744159486947e-06,
2019
+ "loss": 1.5755,
2020
+ "step": 2630
2021
+ },
2022
+ {
2023
+ "epoch": 0.4486743711760707,
2024
+ "grad_norm": 8.299346923828125,
2025
+ "learning_rate": 6.7439723908077e-06,
2026
+ "loss": 2.3673,
2027
+ "step": 2640
2028
+ },
2029
+ {
2030
+ "epoch": 0.4503738953093134,
2031
+ "grad_norm": 6.921226978302002,
2032
+ "learning_rate": 6.716139230924891e-06,
2033
+ "loss": 1.8997,
2034
+ "step": 2650
2035
+ },
2036
+ {
2037
+ "epoch": 0.4520734194425561,
2038
+ "grad_norm": 6.874094486236572,
2039
+ "learning_rate": 6.6882456596196125e-06,
2040
+ "loss": 2.0955,
2041
+ "step": 2660
2042
+ },
2043
+ {
2044
+ "epoch": 0.4537729435757988,
2045
+ "grad_norm": 7.336355686187744,
2046
+ "learning_rate": 6.660292658799565e-06,
2047
+ "loss": 2.0553,
2048
+ "step": 2670
2049
+ },
2050
+ {
2051
+ "epoch": 0.4554724677090415,
2052
+ "grad_norm": 6.311689376831055,
2053
+ "learning_rate": 6.632281212464479e-06,
2054
+ "loss": 1.9258,
2055
+ "step": 2680
2056
+ },
2057
+ {
2058
+ "epoch": 0.4571719918422842,
2059
+ "grad_norm": 5.280473232269287,
2060
+ "learning_rate": 6.60421230667148e-06,
2061
+ "loss": 1.7009,
2062
+ "step": 2690
2063
+ },
2064
+ {
2065
+ "epoch": 0.45887151597552683,
2066
+ "grad_norm": 5.1102986335754395,
2067
+ "learning_rate": 6.5760869295003796e-06,
2068
+ "loss": 1.9853,
2069
+ "step": 2700
2070
+ },
2071
+ {
2072
+ "epoch": 0.46057104010876954,
2073
+ "grad_norm": 6.345133304595947,
2074
+ "learning_rate": 6.54790607101889e-06,
2075
+ "loss": 1.8553,
2076
+ "step": 2710
2077
+ },
2078
+ {
2079
+ "epoch": 0.46227056424201224,
2080
+ "grad_norm": 9.678619384765625,
2081
+ "learning_rate": 6.519670723247773e-06,
2082
+ "loss": 1.7589,
2083
+ "step": 2720
2084
+ },
2085
+ {
2086
+ "epoch": 0.46397008837525494,
2087
+ "grad_norm": 4.833879470825195,
2088
+ "learning_rate": 6.491381880125916e-06,
2089
+ "loss": 1.827,
2090
+ "step": 2730
2091
+ },
2092
+ {
2093
+ "epoch": 0.46566961250849764,
2094
+ "grad_norm": 6.266608238220215,
2095
+ "learning_rate": 6.463040537475354e-06,
2096
+ "loss": 1.7476,
2097
+ "step": 2740
2098
+ },
2099
+ {
2100
+ "epoch": 0.4673691366417403,
2101
+ "grad_norm": 8.750253677368164,
2102
+ "learning_rate": 6.434647692966199e-06,
2103
+ "loss": 2.1204,
2104
+ "step": 2750
2105
+ },
2106
+ {
2107
+ "epoch": 0.469068660774983,
2108
+ "grad_norm": 5.197884559631348,
2109
+ "learning_rate": 6.406204346081531e-06,
2110
+ "loss": 1.5477,
2111
+ "step": 2760
2112
+ },
2113
+ {
2114
+ "epoch": 0.4707681849082257,
2115
+ "grad_norm": 7.584977626800537,
2116
+ "learning_rate": 6.377711498082213e-06,
2117
+ "loss": 1.9078,
2118
+ "step": 2770
2119
+ },
2120
+ {
2121
+ "epoch": 0.4724677090414684,
2122
+ "grad_norm": 5.226884365081787,
2123
+ "learning_rate": 6.349170151971641e-06,
2124
+ "loss": 1.53,
2125
+ "step": 2780
2126
+ },
2127
+ {
2128
+ "epoch": 0.4741672331747111,
2129
+ "grad_norm": 9.499238967895508,
2130
+ "learning_rate": 6.320581312460439e-06,
2131
+ "loss": 1.7868,
2132
+ "step": 2790
2133
+ },
2134
+ {
2135
+ "epoch": 0.47586675730795375,
2136
+ "grad_norm": 6.44546365737915,
2137
+ "learning_rate": 6.291945985931093e-06,
2138
+ "loss": 1.7781,
2139
+ "step": 2800
2140
+ },
2141
+ {
2142
+ "epoch": 0.47756628144119645,
2143
+ "grad_norm": 5.152192115783691,
2144
+ "learning_rate": 6.263265180402517e-06,
2145
+ "loss": 2.0043,
2146
+ "step": 2810
2147
+ },
2148
+ {
2149
+ "epoch": 0.47926580557443915,
2150
+ "grad_norm": 7.129688262939453,
2151
+ "learning_rate": 6.234539905494576e-06,
2152
+ "loss": 1.9584,
2153
+ "step": 2820
2154
+ },
2155
+ {
2156
+ "epoch": 0.48096532970768185,
2157
+ "grad_norm": 6.289784908294678,
2158
+ "learning_rate": 6.2057711723925455e-06,
2159
+ "loss": 1.9584,
2160
+ "step": 2830
2161
+ },
2162
+ {
2163
+ "epoch": 0.48266485384092456,
2164
+ "grad_norm": 7.36097526550293,
2165
+ "learning_rate": 6.176959993811511e-06,
2166
+ "loss": 1.9952,
2167
+ "step": 2840
2168
+ },
2169
+ {
2170
+ "epoch": 0.48436437797416726,
2171
+ "grad_norm": 4.622279644012451,
2172
+ "learning_rate": 6.148107383960723e-06,
2173
+ "loss": 1.9764,
2174
+ "step": 2850
2175
+ },
2176
+ {
2177
+ "epoch": 0.4860639021074099,
2178
+ "grad_norm": 8.651782989501953,
2179
+ "learning_rate": 6.119214358507889e-06,
2180
+ "loss": 1.8804,
2181
+ "step": 2860
2182
+ },
2183
+ {
2184
+ "epoch": 0.4877634262406526,
2185
+ "grad_norm": 6.295717716217041,
2186
+ "learning_rate": 6.090281934543429e-06,
2187
+ "loss": 1.8634,
2188
+ "step": 2870
2189
+ },
2190
+ {
2191
+ "epoch": 0.4894629503738953,
2192
+ "grad_norm": 5.505663871765137,
2193
+ "learning_rate": 6.061311130544664e-06,
2194
+ "loss": 1.8853,
2195
+ "step": 2880
2196
+ },
2197
+ {
2198
+ "epoch": 0.491162474507138,
2199
+ "grad_norm": 10.88358211517334,
2200
+ "learning_rate": 6.032302966339965e-06,
2201
+ "loss": 2.2094,
2202
+ "step": 2890
2203
+ },
2204
+ {
2205
+ "epoch": 0.4928619986403807,
2206
+ "grad_norm": 6.576531410217285,
2207
+ "learning_rate": 6.003258463072859e-06,
2208
+ "loss": 1.5955,
2209
+ "step": 2900
2210
+ },
2211
+ {
2212
+ "epoch": 0.49456152277362336,
2213
+ "grad_norm": 5.74630880355835,
2214
+ "learning_rate": 5.974178643166075e-06,
2215
+ "loss": 1.8756,
2216
+ "step": 2910
2217
+ },
2218
+ {
2219
+ "epoch": 0.49626104690686607,
2220
+ "grad_norm": 8.021421432495117,
2221
+ "learning_rate": 5.945064530285556e-06,
2222
+ "loss": 1.9114,
2223
+ "step": 2920
2224
+ },
2225
+ {
2226
+ "epoch": 0.49796057104010877,
2227
+ "grad_norm": 4.470407009124756,
2228
+ "learning_rate": 5.915917149304424e-06,
2229
+ "loss": 2.2359,
2230
+ "step": 2930
2231
+ },
2232
+ {
2233
+ "epoch": 0.49966009517335147,
2234
+ "grad_norm": 8.264505386352539,
2235
+ "learning_rate": 5.8867375262669076e-06,
2236
+ "loss": 1.7543,
2237
+ "step": 2940
2238
+ },
2239
+ {
2240
+ "epoch": 0.5013596193065941,
2241
+ "grad_norm": 6.8826069831848145,
2242
+ "learning_rate": 5.857526688352211e-06,
2243
+ "loss": 1.7754,
2244
+ "step": 2950
2245
+ },
2246
+ {
2247
+ "epoch": 0.5030591434398368,
2248
+ "grad_norm": 7.148299694061279,
2249
+ "learning_rate": 5.828285663838365e-06,
2250
+ "loss": 2.1194,
2251
+ "step": 2960
2252
+ },
2253
+ {
2254
+ "epoch": 0.5047586675730795,
2255
+ "grad_norm": 6.472208499908447,
2256
+ "learning_rate": 5.7990154820660295e-06,
2257
+ "loss": 1.7127,
2258
+ "step": 2970
2259
+ },
2260
+ {
2261
+ "epoch": 0.5064581917063222,
2262
+ "grad_norm": 8.591309547424316,
2263
+ "learning_rate": 5.769717173402253e-06,
2264
+ "loss": 1.6715,
2265
+ "step": 2980
2266
+ },
2267
+ {
2268
+ "epoch": 0.5081577158395649,
2269
+ "grad_norm": 5.60316801071167,
2270
+ "learning_rate": 5.740391769204209e-06,
2271
+ "loss": 1.4632,
2272
+ "step": 2990
2273
+ },
2274
+ {
2275
+ "epoch": 0.5098572399728076,
2276
+ "grad_norm": 5.79714822769165,
2277
+ "learning_rate": 5.71104030178288e-06,
2278
+ "loss": 1.7171,
2279
+ "step": 3000
2280
+ },
2281
+ {
2282
+ "epoch": 0.5098572399728076,
2283
+ "eval_cosine_accuracy@1": 0.707,
2284
+ "eval_cosine_accuracy@10": 0.915,
2285
+ "eval_cosine_accuracy@3": 0.839,
2286
+ "eval_cosine_accuracy@5": 0.874,
2287
+ "eval_cosine_map@100": 0.7842834572194356,
2288
+ "eval_cosine_mrr@10": 0.7809134920634915,
2289
+ "eval_cosine_ndcg@10": 0.8136383995155022,
2290
+ "eval_cosine_precision@1": 0.707,
2291
+ "eval_cosine_precision@10": 0.0915,
2292
+ "eval_cosine_precision@3": 0.2796666666666666,
2293
+ "eval_cosine_precision@5": 0.1748,
2294
+ "eval_cosine_recall@1": 0.707,
2295
+ "eval_cosine_recall@10": 0.915,
2296
+ "eval_cosine_recall@3": 0.839,
2297
+ "eval_cosine_recall@5": 0.874,
2298
+ "eval_loss": 1.6132309436798096,
2299
+ "eval_runtime": 2.7954,
2300
+ "eval_samples_per_second": 269.727,
2301
+ "eval_sequential_score": 0.7842834572194356,
2302
+ "eval_steps_per_second": 2.146,
2303
+ "eval_sts-dev_pearson_cosine": 0.8056038655154354,
2304
+ "eval_sts-dev_pearson_dot": 0.8019913183334593,
2305
+ "eval_sts-dev_pearson_euclidean": 0.7922504417572196,
2306
+ "eval_sts-dev_pearson_manhattan": 0.7937333279050955,
2307
+ "eval_sts-dev_pearson_max": 0.8056038655154354,
2308
+ "eval_sts-dev_spearman_cosine": 0.8083570013976158,
2309
+ "eval_sts-dev_spearman_dot": 0.8047381961082496,
2310
+ "eval_sts-dev_spearman_euclidean": 0.8071350345841625,
2311
+ "eval_sts-dev_spearman_manhattan": 0.8069596988802814,
2312
+ "eval_sts-dev_spearman_max": 0.8083570013976158,
2313
+ "step": 3000
2314
+ },
2315
+ {
2316
+ "epoch": 0.5115567641060503,
2317
+ "grad_norm": 6.9394049644470215,
2318
+ "learning_rate": 5.6816638043667294e-06,
2319
+ "loss": 1.4526,
2320
+ "step": 3010
2321
+ },
2322
+ {
2323
+ "epoch": 0.513256288239293,
2324
+ "grad_norm": 10.348281860351562,
2325
+ "learning_rate": 5.652263311065325e-06,
2326
+ "loss": 1.8309,
2327
+ "step": 3020
2328
+ },
2329
+ {
2330
+ "epoch": 0.5149558123725357,
2331
+ "grad_norm": 8.404902458190918,
2332
+ "learning_rate": 5.6228398568329344e-06,
2333
+ "loss": 1.9336,
2334
+ "step": 3030
2335
+ },
2336
+ {
2337
+ "epoch": 0.5166553365057783,
2338
+ "grad_norm": 6.444024562835693,
2339
+ "learning_rate": 5.593394477432096e-06,
2340
+ "loss": 1.7704,
2341
+ "step": 3040
2342
+ },
2343
+ {
2344
+ "epoch": 0.518354860639021,
2345
+ "grad_norm": 9.629279136657715,
2346
+ "learning_rate": 5.563928209397151e-06,
2347
+ "loss": 1.943,
2348
+ "step": 3050
2349
+ },
2350
+ {
2351
+ "epoch": 0.5200543847722637,
2352
+ "grad_norm": 6.889398097991943,
2353
+ "learning_rate": 5.534442089997771e-06,
2354
+ "loss": 1.8689,
2355
+ "step": 3060
2356
+ },
2357
+ {
2358
+ "epoch": 0.5217539089055064,
2359
+ "grad_norm": 7.757649898529053,
2360
+ "learning_rate": 5.5049371572024235e-06,
2361
+ "loss": 1.8908,
2362
+ "step": 3070
2363
+ },
2364
+ {
2365
+ "epoch": 0.5234534330387491,
2366
+ "grad_norm": 6.982544422149658,
2367
+ "learning_rate": 5.475414449641852e-06,
2368
+ "loss": 1.7924,
2369
+ "step": 3080
2370
+ },
2371
+ {
2372
+ "epoch": 0.5251529571719918,
2373
+ "grad_norm": 7.109350204467773,
2374
+ "learning_rate": 5.445875006572502e-06,
2375
+ "loss": 1.7891,
2376
+ "step": 3090
2377
+ },
2378
+ {
2379
+ "epoch": 0.5268524813052345,
2380
+ "grad_norm": 7.507669448852539,
2381
+ "learning_rate": 5.416319867839943e-06,
2382
+ "loss": 2.0208,
2383
+ "step": 3100
2384
+ },
2385
+ {
2386
+ "epoch": 0.5285520054384772,
2387
+ "grad_norm": 4.527754783630371,
2388
+ "learning_rate": 5.386750073842259e-06,
2389
+ "loss": 1.5862,
2390
+ "step": 3110
2391
+ },
2392
+ {
2393
+ "epoch": 0.53025152957172,
2394
+ "grad_norm": 8.16139030456543,
2395
+ "learning_rate": 5.357166665493434e-06,
2396
+ "loss": 2.2195,
2397
+ "step": 3120
2398
+ },
2399
+ {
2400
+ "epoch": 0.5319510537049627,
2401
+ "grad_norm": 7.385548114776611,
2402
+ "learning_rate": 5.327570684186697e-06,
2403
+ "loss": 1.6103,
2404
+ "step": 3130
2405
+ },
2406
+ {
2407
+ "epoch": 0.5336505778382054,
2408
+ "grad_norm": 7.784885406494141,
2409
+ "learning_rate": 5.297963171757874e-06,
2410
+ "loss": 2.5839,
2411
+ "step": 3140
2412
+ },
2413
+ {
2414
+ "epoch": 0.535350101971448,
2415
+ "grad_norm": 7.986569404602051,
2416
+ "learning_rate": 5.268345170448704e-06,
2417
+ "loss": 2.4304,
2418
+ "step": 3150
2419
+ },
2420
+ {
2421
+ "epoch": 0.5370496261046906,
2422
+ "grad_norm": 7.29852294921875,
2423
+ "learning_rate": 5.23871772287016e-06,
2424
+ "loss": 1.9009,
2425
+ "step": 3160
2426
+ },
2427
+ {
2428
+ "epoch": 0.5387491502379333,
2429
+ "grad_norm": 8.884086608886719,
2430
+ "learning_rate": 5.209081871965741e-06,
2431
+ "loss": 2.2161,
2432
+ "step": 3170
2433
+ },
2434
+ {
2435
+ "epoch": 0.540448674371176,
2436
+ "grad_norm": 6.930916786193848,
2437
+ "learning_rate": 5.179438660974756e-06,
2438
+ "loss": 1.9054,
2439
+ "step": 3180
2440
+ },
2441
+ {
2442
+ "epoch": 0.5421481985044188,
2443
+ "grad_norm": 6.724717617034912,
2444
+ "learning_rate": 5.149789133395606e-06,
2445
+ "loss": 1.3603,
2446
+ "step": 3190
2447
+ },
2448
+ {
2449
+ "epoch": 0.5438477226376615,
2450
+ "grad_norm": 11.375778198242188,
2451
+ "learning_rate": 5.120134332949049e-06,
2452
+ "loss": 2.1651,
2453
+ "step": 3200
2454
+ },
2455
+ {
2456
+ "epoch": 0.5455472467709042,
2457
+ "grad_norm": 9.330193519592285,
2458
+ "learning_rate": 5.0904753035414545e-06,
2459
+ "loss": 2.0636,
2460
+ "step": 3210
2461
+ },
2462
+ {
2463
+ "epoch": 0.5472467709041469,
2464
+ "grad_norm": 7.2718000411987305,
2465
+ "learning_rate": 5.060813089228064e-06,
2466
+ "loss": 2.0036,
2467
+ "step": 3220
2468
+ },
2469
+ {
2470
+ "epoch": 0.5489462950373896,
2471
+ "grad_norm": 5.489546775817871,
2472
+ "learning_rate": 5.0311487341762285e-06,
2473
+ "loss": 1.7724,
2474
+ "step": 3230
2475
+ },
2476
+ {
2477
+ "epoch": 0.5506458191706323,
2478
+ "grad_norm": 7.590505123138428,
2479
+ "learning_rate": 5.001483282628664e-06,
2480
+ "loss": 1.2798,
2481
+ "step": 3240
2482
+ },
2483
+ {
2484
+ "epoch": 0.552345343303875,
2485
+ "grad_norm": 5.193188190460205,
2486
+ "learning_rate": 4.971817778866681e-06,
2487
+ "loss": 1.8975,
2488
+ "step": 3250
2489
+ },
2490
+ {
2491
+ "epoch": 0.5540448674371176,
2492
+ "grad_norm": 4.518863677978516,
2493
+ "learning_rate": 4.9421532671734265e-06,
2494
+ "loss": 1.8153,
2495
+ "step": 3260
2496
+ },
2497
+ {
2498
+ "epoch": 0.5557443915703603,
2499
+ "grad_norm": 7.619812488555908,
2500
+ "learning_rate": 4.912490791797129e-06,
2501
+ "loss": 1.6361,
2502
+ "step": 3270
2503
+ },
2504
+ {
2505
+ "epoch": 0.557443915703603,
2506
+ "grad_norm": 8.381867408752441,
2507
+ "learning_rate": 4.882831396914332e-06,
2508
+ "loss": 2.2208,
2509
+ "step": 3280
2510
+ },
2511
+ {
2512
+ "epoch": 0.5591434398368457,
2513
+ "grad_norm": 4.878629684448242,
2514
+ "learning_rate": 4.853176126593139e-06,
2515
+ "loss": 1.7356,
2516
+ "step": 3290
2517
+ },
2518
+ {
2519
+ "epoch": 0.5608429639700884,
2520
+ "grad_norm": 6.435664653778076,
2521
+ "learning_rate": 4.823526024756464e-06,
2522
+ "loss": 1.8563,
2523
+ "step": 3300
2524
+ },
2525
+ {
2526
+ "epoch": 0.5625424881033311,
2527
+ "grad_norm": 6.569293022155762,
2528
+ "learning_rate": 4.793882135145278e-06,
2529
+ "loss": 1.8743,
2530
+ "step": 3310
2531
+ },
2532
+ {
2533
+ "epoch": 0.5642420122365738,
2534
+ "grad_norm": 6.728095054626465,
2535
+ "learning_rate": 4.764245501281869e-06,
2536
+ "loss": 1.7494,
2537
+ "step": 3320
2538
+ },
2539
+ {
2540
+ "epoch": 0.5659415363698165,
2541
+ "grad_norm": 6.320137977600098,
2542
+ "learning_rate": 4.734617166433113e-06,
2543
+ "loss": 1.5363,
2544
+ "step": 3330
2545
+ },
2546
+ {
2547
+ "epoch": 0.5676410605030592,
2548
+ "grad_norm": 5.589328765869141,
2549
+ "learning_rate": 4.7049981735737385e-06,
2550
+ "loss": 2.0732,
2551
+ "step": 3340
2552
+ },
2553
+ {
2554
+ "epoch": 0.5693405846363019,
2555
+ "grad_norm": 6.616926193237305,
2556
+ "learning_rate": 4.67538956534962e-06,
2557
+ "loss": 1.7303,
2558
+ "step": 3350
2559
+ },
2560
+ {
2561
+ "epoch": 0.5710401087695445,
2562
+ "grad_norm": 4.177309036254883,
2563
+ "learning_rate": 4.645792384041078e-06,
2564
+ "loss": 1.532,
2565
+ "step": 3360
2566
+ },
2567
+ {
2568
+ "epoch": 0.5727396329027872,
2569
+ "grad_norm": 6.281280517578125,
2570
+ "learning_rate": 4.616207671526175e-06,
2571
+ "loss": 2.2504,
2572
+ "step": 3370
2573
+ },
2574
+ {
2575
+ "epoch": 0.5744391570360299,
2576
+ "grad_norm": 6.781736373901367,
2577
+ "learning_rate": 4.586636469244054e-06,
2578
+ "loss": 1.6377,
2579
+ "step": 3380
2580
+ },
2581
+ {
2582
+ "epoch": 0.5761386811692726,
2583
+ "grad_norm": 8.028448104858398,
2584
+ "learning_rate": 4.557079818158269e-06,
2585
+ "loss": 2.1332,
2586
+ "step": 3390
2587
+ },
2588
+ {
2589
+ "epoch": 0.5778382053025153,
2590
+ "grad_norm": 5.485851764678955,
2591
+ "learning_rate": 4.527538758720144e-06,
2592
+ "loss": 1.8658,
2593
+ "step": 3400
2594
+ },
2595
+ {
2596
+ "epoch": 0.579537729435758,
2597
+ "grad_norm": 5.404677867889404,
2598
+ "learning_rate": 4.498014330832148e-06,
2599
+ "loss": 1.8165,
2600
+ "step": 3410
2601
+ },
2602
+ {
2603
+ "epoch": 0.5812372535690007,
2604
+ "grad_norm": 5.938612461090088,
2605
+ "learning_rate": 4.468507573811285e-06,
2606
+ "loss": 1.7847,
2607
+ "step": 3420
2608
+ },
2609
+ {
2610
+ "epoch": 0.5829367777022434,
2611
+ "grad_norm": 6.639806270599365,
2612
+ "learning_rate": 4.439019526352511e-06,
2613
+ "loss": 1.747,
2614
+ "step": 3430
2615
+ },
2616
+ {
2617
+ "epoch": 0.5846363018354861,
2618
+ "grad_norm": 10.548473358154297,
2619
+ "learning_rate": 4.409551226492173e-06,
2620
+ "loss": 1.8863,
2621
+ "step": 3440
2622
+ },
2623
+ {
2624
+ "epoch": 0.5863358259687288,
2625
+ "grad_norm": 6.586738109588623,
2626
+ "learning_rate": 4.380103711571459e-06,
2627
+ "loss": 2.0465,
2628
+ "step": 3450
2629
+ },
2630
+ {
2631
+ "epoch": 0.5880353501019715,
2632
+ "grad_norm": 6.210246562957764,
2633
+ "learning_rate": 4.350678018199887e-06,
2634
+ "loss": 1.6285,
2635
+ "step": 3460
2636
+ },
2637
+ {
2638
+ "epoch": 0.5897348742352141,
2639
+ "grad_norm": 8.666604995727539,
2640
+ "learning_rate": 4.321275182218821e-06,
2641
+ "loss": 2.278,
2642
+ "step": 3470
2643
+ },
2644
+ {
2645
+ "epoch": 0.5914343983684568,
2646
+ "grad_norm": 5.970696926116943,
2647
+ "learning_rate": 4.291896238664991e-06,
2648
+ "loss": 1.7391,
2649
+ "step": 3480
2650
+ },
2651
+ {
2652
+ "epoch": 0.5931339225016995,
2653
+ "grad_norm": 6.197153091430664,
2654
+ "learning_rate": 4.262542221734077e-06,
2655
+ "loss": 1.8116,
2656
+ "step": 3490
2657
+ },
2658
+ {
2659
+ "epoch": 0.5948334466349422,
2660
+ "grad_norm": 5.297378063201904,
2661
+ "learning_rate": 4.233214164744286e-06,
2662
+ "loss": 1.5104,
2663
+ "step": 3500
2664
+ },
2665
+ {
2666
+ "epoch": 0.5948334466349422,
2667
+ "eval_cosine_accuracy@1": 0.7,
2668
+ "eval_cosine_accuracy@10": 0.9105,
2669
+ "eval_cosine_accuracy@3": 0.8315,
2670
+ "eval_cosine_accuracy@5": 0.871,
2671
+ "eval_cosine_map@100": 0.7789008444240532,
2672
+ "eval_cosine_mrr@10": 0.775367857142857,
2673
+ "eval_cosine_ndcg@10": 0.8083837978692977,
2674
+ "eval_cosine_precision@1": 0.7,
2675
+ "eval_cosine_precision@10": 0.09105,
2676
+ "eval_cosine_precision@3": 0.2771666666666666,
2677
+ "eval_cosine_precision@5": 0.17420000000000005,
2678
+ "eval_cosine_recall@1": 0.7,
2679
+ "eval_cosine_recall@10": 0.9105,
2680
+ "eval_cosine_recall@3": 0.8315,
2681
+ "eval_cosine_recall@5": 0.871,
2682
+ "eval_loss": 1.5698647499084473,
2683
+ "eval_runtime": 2.7632,
2684
+ "eval_samples_per_second": 272.875,
2685
+ "eval_sequential_score": 0.7789008444240532,
2686
+ "eval_steps_per_second": 2.171,
2687
+ "eval_sts-dev_pearson_cosine": 0.8017937499678631,
2688
+ "eval_sts-dev_pearson_dot": 0.7968971369444564,
2689
+ "eval_sts-dev_pearson_euclidean": 0.7889399312152248,
2690
+ "eval_sts-dev_pearson_manhattan": 0.7898347176162472,
2691
+ "eval_sts-dev_pearson_max": 0.8017937499678631,
2692
+ "eval_sts-dev_spearman_cosine": 0.8044304044501694,
2693
+ "eval_sts-dev_spearman_dot": 0.8013449184419915,
2694
+ "eval_sts-dev_spearman_euclidean": 0.8042950575910329,
2695
+ "eval_sts-dev_spearman_manhattan": 0.8042523771894302,
2696
+ "eval_sts-dev_spearman_max": 0.8044304044501694,
2697
+ "step": 3500
2698
+ },
2699
+ {
2700
+ "epoch": 0.5965329707681849,
2701
+ "grad_norm": 5.6484575271606445,
2702
+ "learning_rate": 4.2039131000999865e-06,
2703
+ "loss": 1.8306,
2704
+ "step": 3510
2705
+ },
2706
+ {
2707
+ "epoch": 0.5982324949014276,
2708
+ "grad_norm": 7.109194278717041,
2709
+ "learning_rate": 4.174640059255369e-06,
2710
+ "loss": 1.6575,
2711
+ "step": 3520
2712
+ },
2713
+ {
2714
+ "epoch": 0.5999320190346703,
2715
+ "grad_norm": 8.67902660369873,
2716
+ "learning_rate": 4.145396072678126e-06,
2717
+ "loss": 1.56,
2718
+ "step": 3530
2719
+ },
2720
+ {
2721
+ "epoch": 0.601631543167913,
2722
+ "grad_norm": 5.615638732910156,
2723
+ "learning_rate": 4.116182169813185e-06,
2724
+ "loss": 1.8674,
2725
+ "step": 3540
2726
+ },
2727
+ {
2728
+ "epoch": 0.6033310673011557,
2729
+ "grad_norm": 6.5344085693359375,
2730
+ "learning_rate": 4.086999379046472e-06,
2731
+ "loss": 1.4909,
2732
+ "step": 3550
2733
+ },
2734
+ {
2735
+ "epoch": 0.6050305914343984,
2736
+ "grad_norm": 7.410928726196289,
2737
+ "learning_rate": 4.057848727668701e-06,
2738
+ "loss": 1.6653,
2739
+ "step": 3560
2740
+ },
2741
+ {
2742
+ "epoch": 0.6067301155676411,
2743
+ "grad_norm": 5.162845134735107,
2744
+ "learning_rate": 4.028731241839224e-06,
2745
+ "loss": 2.2836,
2746
+ "step": 3570
2747
+ },
2748
+ {
2749
+ "epoch": 0.6084296397008837,
2750
+ "grad_norm": 9.384202003479004,
2751
+ "learning_rate": 3.999647946549898e-06,
2752
+ "loss": 2.1572,
2753
+ "step": 3580
2754
+ },
2755
+ {
2756
+ "epoch": 0.6101291638341264,
2757
+ "grad_norm": 9.560714721679688,
2758
+ "learning_rate": 3.970599865589006e-06,
2759
+ "loss": 1.6969,
2760
+ "step": 3590
2761
+ },
2762
+ {
2763
+ "epoch": 0.6118286879673691,
2764
+ "grad_norm": 8.857442855834961,
2765
+ "learning_rate": 3.94158802150522e-06,
2766
+ "loss": 2.0679,
2767
+ "step": 3600
2768
+ },
2769
+ {
2770
+ "epoch": 0.6135282121006118,
2771
+ "grad_norm": 4.5523200035095215,
2772
+ "learning_rate": 3.912613435571603e-06,
2773
+ "loss": 1.8555,
2774
+ "step": 3610
2775
+ },
2776
+ {
2777
+ "epoch": 0.6152277362338545,
2778
+ "grad_norm": 4.791132926940918,
2779
+ "learning_rate": 3.8836771277496575e-06,
2780
+ "loss": 1.8506,
2781
+ "step": 3620
2782
+ },
2783
+ {
2784
+ "epoch": 0.6169272603670972,
2785
+ "grad_norm": 5.0394673347473145,
2786
+ "learning_rate": 3.854780116653429e-06,
2787
+ "loss": 1.4441,
2788
+ "step": 3630
2789
+ },
2790
+ {
2791
+ "epoch": 0.6186267845003399,
2792
+ "grad_norm": 8.9247465133667,
2793
+ "learning_rate": 3.8259234195136344e-06,
2794
+ "loss": 1.6772,
2795
+ "step": 3640
2796
+ },
2797
+ {
2798
+ "epoch": 0.6203263086335826,
2799
+ "grad_norm": 7.963625907897949,
2800
+ "learning_rate": 3.7971080521418636e-06,
2801
+ "loss": 1.6228,
2802
+ "step": 3650
2803
+ },
2804
+ {
2805
+ "epoch": 0.6220258327668253,
2806
+ "grad_norm": 5.572691917419434,
2807
+ "learning_rate": 3.7683350288948204e-06,
2808
+ "loss": 1.7809,
2809
+ "step": 3660
2810
+ },
2811
+ {
2812
+ "epoch": 0.623725356900068,
2813
+ "grad_norm": 4.029000282287598,
2814
+ "learning_rate": 3.7396053626386115e-06,
2815
+ "loss": 1.5777,
2816
+ "step": 3670
2817
+ },
2818
+ {
2819
+ "epoch": 0.6254248810333106,
2820
+ "grad_norm": 5.965753555297852,
2821
+ "learning_rate": 3.7109200647130983e-06,
2822
+ "loss": 2.0558,
2823
+ "step": 3680
2824
+ },
2825
+ {
2826
+ "epoch": 0.6271244051665533,
2827
+ "grad_norm": 5.478359222412109,
2828
+ "learning_rate": 3.6822801448962835e-06,
2829
+ "loss": 1.9044,
2830
+ "step": 3690
2831
+ },
2832
+ {
2833
+ "epoch": 0.628823929299796,
2834
+ "grad_norm": 6.156702518463135,
2835
+ "learning_rate": 3.6536866113687754e-06,
2836
+ "loss": 1.9202,
2837
+ "step": 3700
2838
+ },
2839
+ {
2840
+ "epoch": 0.6305234534330387,
2841
+ "grad_norm": 5.119917392730713,
2842
+ "learning_rate": 3.6251404706783005e-06,
2843
+ "loss": 1.4821,
2844
+ "step": 3710
2845
+ },
2846
+ {
2847
+ "epoch": 0.6322229775662814,
2848
+ "grad_norm": 5.908693790435791,
2849
+ "learning_rate": 3.5966427277042593e-06,
2850
+ "loss": 1.5492,
2851
+ "step": 3720
2852
+ },
2853
+ {
2854
+ "epoch": 0.6339225016995241,
2855
+ "grad_norm": 6.6029181480407715,
2856
+ "learning_rate": 3.568194385622359e-06,
2857
+ "loss": 1.7337,
2858
+ "step": 3730
2859
+ },
2860
+ {
2861
+ "epoch": 0.6356220258327668,
2862
+ "grad_norm": 6.767298221588135,
2863
+ "learning_rate": 3.5397964458693045e-06,
2864
+ "loss": 2.0108,
2865
+ "step": 3740
2866
+ },
2867
+ {
2868
+ "epoch": 0.6373215499660095,
2869
+ "grad_norm": 7.079708576202393,
2870
+ "learning_rate": 3.5114499081075386e-06,
2871
+ "loss": 1.7365,
2872
+ "step": 3750
2873
+ },
2874
+ {
2875
+ "epoch": 0.6390210740992522,
2876
+ "grad_norm": 5.657531261444092,
2877
+ "learning_rate": 3.4831557701900507e-06,
2878
+ "loss": 1.5291,
2879
+ "step": 3760
2880
+ },
2881
+ {
2882
+ "epoch": 0.6407205982324949,
2883
+ "grad_norm": 9.403407096862793,
2884
+ "learning_rate": 3.4549150281252635e-06,
2885
+ "loss": 1.8147,
2886
+ "step": 3770
2887
+ },
2888
+ {
2889
+ "epoch": 0.6424201223657376,
2890
+ "grad_norm": 4.905513763427734,
2891
+ "learning_rate": 3.4267286760419527e-06,
2892
+ "loss": 1.6704,
2893
+ "step": 3780
2894
+ },
2895
+ {
2896
+ "epoch": 0.6441196464989802,
2897
+ "grad_norm": 5.188987731933594,
2898
+ "learning_rate": 3.398597706154268e-06,
2899
+ "loss": 1.727,
2900
+ "step": 3790
2901
+ },
2902
+ {
2903
+ "epoch": 0.6458191706322229,
2904
+ "grad_norm": 9.689862251281738,
2905
+ "learning_rate": 3.3705231087267955e-06,
2906
+ "loss": 1.741,
2907
+ "step": 3800
2908
+ },
2909
+ {
2910
+ "epoch": 0.6475186947654656,
2911
+ "grad_norm": 5.145129203796387,
2912
+ "learning_rate": 3.3425058720396986e-06,
2913
+ "loss": 1.6929,
2914
+ "step": 3810
2915
+ },
2916
+ {
2917
+ "epoch": 0.6492182188987083,
2918
+ "grad_norm": 5.635613918304443,
2919
+ "learning_rate": 3.314546982353941e-06,
2920
+ "loss": 1.7855,
2921
+ "step": 3820
2922
+ },
2923
+ {
2924
+ "epoch": 0.650917743031951,
2925
+ "grad_norm": 5.333996295928955,
2926
+ "learning_rate": 3.2866474238765522e-06,
2927
+ "loss": 1.4379,
2928
+ "step": 3830
2929
+ },
2930
+ {
2931
+ "epoch": 0.6526172671651937,
2932
+ "grad_norm": 7.929337501525879,
2933
+ "learning_rate": 3.258808178725984e-06,
2934
+ "loss": 1.8255,
2935
+ "step": 3840
2936
+ },
2937
+ {
2938
+ "epoch": 0.6543167912984365,
2939
+ "grad_norm": 7.606388092041016,
2940
+ "learning_rate": 3.2310302268975545e-06,
2941
+ "loss": 1.8072,
2942
+ "step": 3850
2943
+ },
2944
+ {
2945
+ "epoch": 0.6560163154316792,
2946
+ "grad_norm": 5.5722126960754395,
2947
+ "learning_rate": 3.2033145462289284e-06,
2948
+ "loss": 1.4806,
2949
+ "step": 3860
2950
+ },
2951
+ {
2952
+ "epoch": 0.6577158395649219,
2953
+ "grad_norm": 4.687691688537598,
2954
+ "learning_rate": 3.175662112365707e-06,
2955
+ "loss": 1.7177,
2956
+ "step": 3870
2957
+ },
2958
+ {
2959
+ "epoch": 0.6594153636981646,
2960
+ "grad_norm": 5.950764179229736,
2961
+ "learning_rate": 3.1480738987270854e-06,
2962
+ "loss": 1.3493,
2963
+ "step": 3880
2964
+ },
2965
+ {
2966
+ "epoch": 0.6611148878314073,
2967
+ "grad_norm": 6.248993396759033,
2968
+ "learning_rate": 3.120550876471576e-06,
2969
+ "loss": 1.7455,
2970
+ "step": 3890
2971
+ },
2972
+ {
2973
+ "epoch": 0.6628144119646499,
2974
+ "grad_norm": 6.485055446624756,
2975
+ "learning_rate": 3.0930940144628385e-06,
2976
+ "loss": 1.5822,
2977
+ "step": 3900
2978
+ },
2979
+ {
2980
+ "epoch": 0.6645139360978926,
2981
+ "grad_norm": 6.7886881828308105,
2982
+ "learning_rate": 3.0657042792355528e-06,
2983
+ "loss": 2.4897,
2984
+ "step": 3910
2985
+ },
2986
+ {
2987
+ "epoch": 0.6662134602311353,
2988
+ "grad_norm": 4.51090669631958,
2989
+ "learning_rate": 3.0383826349614115e-06,
2990
+ "loss": 1.8648,
2991
+ "step": 3920
2992
+ },
2993
+ {
2994
+ "epoch": 0.667912984364378,
2995
+ "grad_norm": 7.129496097564697,
2996
+ "learning_rate": 3.0111300434151746e-06,
2997
+ "loss": 1.8095,
2998
+ "step": 3930
2999
+ },
3000
+ {
3001
+ "epoch": 0.6696125084976207,
3002
+ "grad_norm": 8.079377174377441,
3003
+ "learning_rate": 2.9839474639408108e-06,
3004
+ "loss": 2.0793,
3005
+ "step": 3940
3006
+ },
3007
+ {
3008
+ "epoch": 0.6713120326308634,
3009
+ "grad_norm": 6.9486284255981445,
3010
+ "learning_rate": 2.956835853417726e-06,
3011
+ "loss": 1.9273,
3012
+ "step": 3950
3013
+ },
3014
+ {
3015
+ "epoch": 0.6730115567641061,
3016
+ "grad_norm": 6.231419563293457,
3017
+ "learning_rate": 2.9297961662270893e-06,
3018
+ "loss": 1.6705,
3019
+ "step": 3960
3020
+ },
3021
+ {
3022
+ "epoch": 0.6747110808973488,
3023
+ "grad_norm": 9.692667007446289,
3024
+ "learning_rate": 2.902829354218219e-06,
3025
+ "loss": 1.8497,
3026
+ "step": 3970
3027
+ },
3028
+ {
3029
+ "epoch": 0.6764106050305915,
3030
+ "grad_norm": 6.941633701324463,
3031
+ "learning_rate": 2.875936366675096e-06,
3032
+ "loss": 2.1609,
3033
+ "step": 3980
3034
+ },
3035
+ {
3036
+ "epoch": 0.6781101291638342,
3037
+ "grad_norm": 4.512592315673828,
3038
+ "learning_rate": 2.8491181502829335e-06,
3039
+ "loss": 1.719,
3040
+ "step": 3990
3041
+ },
3042
+ {
3043
+ "epoch": 0.6798096532970768,
3044
+ "grad_norm": 13.518653869628906,
3045
+ "learning_rate": 2.822375649094846e-06,
3046
+ "loss": 1.7953,
3047
+ "step": 4000
3048
+ },
3049
+ {
3050
+ "epoch": 0.6798096532970768,
3051
+ "eval_cosine_accuracy@1": 0.7145,
3052
+ "eval_cosine_accuracy@10": 0.914,
3053
+ "eval_cosine_accuracy@3": 0.839,
3054
+ "eval_cosine_accuracy@5": 0.8745,
3055
+ "eval_cosine_map@100": 0.7881479093038395,
3056
+ "eval_cosine_mrr@10": 0.7848144841269835,
3057
+ "eval_cosine_ndcg@10": 0.8163072349117797,
3058
+ "eval_cosine_precision@1": 0.7145,
3059
+ "eval_cosine_precision@10": 0.09140000000000001,
3060
+ "eval_cosine_precision@3": 0.2796666666666666,
3061
+ "eval_cosine_precision@5": 0.1749,
3062
+ "eval_cosine_recall@1": 0.7145,
3063
+ "eval_cosine_recall@10": 0.914,
3064
+ "eval_cosine_recall@3": 0.839,
3065
+ "eval_cosine_recall@5": 0.8745,
3066
+ "eval_loss": 1.5573265552520752,
3067
+ "eval_runtime": 2.7757,
3068
+ "eval_samples_per_second": 271.645,
3069
+ "eval_sequential_score": 0.7881479093038395,
3070
+ "eval_steps_per_second": 2.162,
3071
+ "eval_sts-dev_pearson_cosine": 0.8053811920422538,
3072
+ "eval_sts-dev_pearson_dot": 0.8004660633078914,
3073
+ "eval_sts-dev_pearson_euclidean": 0.7928635065778744,
3074
+ "eval_sts-dev_pearson_manhattan": 0.7941416034985971,
3075
+ "eval_sts-dev_pearson_max": 0.8053811920422538,
3076
+ "eval_sts-dev_spearman_cosine": 0.8086109320148909,
3077
+ "eval_sts-dev_spearman_dot": 0.804234875942229,
3078
+ "eval_sts-dev_spearman_euclidean": 0.8084185226972233,
3079
+ "eval_sts-dev_spearman_manhattan": 0.8088695507249705,
3080
+ "eval_sts-dev_spearman_max": 0.8088695507249705,
3081
+ "step": 4000
3082
+ }
3083
+ ],
3084
+ "logging_steps": 10,
3085
+ "max_steps": 5884,
3086
+ "num_input_tokens_seen": 0,
3087
+ "num_train_epochs": 1,
3088
+ "save_steps": 500,
3089
+ "stateful_callbacks": {
3090
+ "TrainerControl": {
3091
+ "args": {
3092
+ "should_epoch_stop": false,
3093
+ "should_evaluate": false,
3094
+ "should_log": false,
3095
+ "should_save": true,
3096
+ "should_training_stop": false
3097
+ },
3098
+ "attributes": {}
3099
+ }
3100
+ },
3101
+ "total_flos": 0.0,
3102
+ "train_batch_size": 128,
3103
+ "trial_name": null,
3104
+ "trial_params": null
3105
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff73f331abc247df1bb895d7a7aef82db9f32fbce58b78521eec8094f43a2457
3
+ size 5496
vocab.txt ADDED
The diff for this file is too large to render. See raw diff