SKNahin commited on
Commit
96eaf59
·
verified ·
1 Parent(s): 9ab72de

Training in progress, step 150

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/azureuser/Desktop/sk/translator/saves-2/BanglaQwen/translator-cont/checkpoint-785",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 14,
17
+ "num_hidden_layers": 24,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.49.0",
26
+ "use_cache": false,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 194498
29
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10bb44ee6be697598c52ea185883fb9d39d477d649666a81c0a4d4d2ce946df9
3
+ size 1064369000
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8ec123df4ff5fd027294ce9931a2709a061268c399250056437627ea20e304f
3
+ size 27868597
tokenizer_config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "11370": {
5
+ "content": "/************************************************************************",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "16395": {
13
+ "content": "%%%%%%%%%%%%%%%%",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "33009": {
21
+ "content": "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "65080": {
29
+ "content": "//************************************************************************",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151643": {
37
+ "content": "<|endoftext|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151644": {
45
+ "content": "<|im_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151645": {
53
+ "content": "<|im_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ }
60
+ },
61
+ "additional_special_tokens": [
62
+ "<|im_start|>",
63
+ "<|im_end|>"
64
+ ],
65
+ "bos_token": null,
66
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are BanglaQwen, developed by AI4BD. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within /************************************************************************//************************************************************************ XML tags:\\n/************************************************************************\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n//************************************************************************<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are BanglaQwen, developed by AI4BD. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n/************************************************************************\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n//************************************************************************' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n%%%%%%%%%%%%%%%%\\n' }}\n {{- message.content }}\n {{- '\\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
67
+ "clean_up_tokenization_spaces": false,
68
+ "eos_token": "<|im_end|>",
69
+ "errors": "replace",
70
+ "extra_special_tokens": {},
71
+ "model_max_length": 4096,
72
+ "pad_token": "<|endoftext|>",
73
+ "padding_side": "right",
74
+ "split_special_tokens": false,
75
+ "tokenizer_class": "Qwen2Tokenizer",
76
+ "unk_token": null
77
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 4354, "loss": 1.3804, "lr": 1.0000000000000002e-06, "epoch": 0.0004592000734720118, "percentage": 0.02, "elapsed_time": "0:00:20", "remaining_time": "1 day, 0:57:58"}
2
+ {"current_steps": 2, "total_steps": 4354, "loss": 1.3895, "lr": 2.0000000000000003e-06, "epoch": 0.0009184001469440236, "percentage": 0.05, "elapsed_time": "0:00:27", "remaining_time": "16:49:38"}
3
+ {"current_steps": 3, "total_steps": 4354, "loss": 1.2751, "lr": 3e-06, "epoch": 0.0013776002204160353, "percentage": 0.07, "elapsed_time": "0:00:31", "remaining_time": "12:49:11"}
4
+ {"current_steps": 4, "total_steps": 4354, "loss": 1.3427, "lr": 4.000000000000001e-06, "epoch": 0.0018368002938880471, "percentage": 0.09, "elapsed_time": "0:00:35", "remaining_time": "10:44:28"}
5
+ {"current_steps": 5, "total_steps": 4354, "loss": 1.3587, "lr": 5e-06, "epoch": 0.002296000367360059, "percentage": 0.11, "elapsed_time": "0:00:39", "remaining_time": "9:28:12"}
6
+ {"current_steps": 6, "total_steps": 4354, "loss": 1.3953, "lr": 6e-06, "epoch": 0.0027552004408320707, "percentage": 0.14, "elapsed_time": "0:00:42", "remaining_time": "8:37:41"}
7
+ {"current_steps": 7, "total_steps": 4354, "loss": 1.4018, "lr": 7.000000000000001e-06, "epoch": 0.0032144005143040825, "percentage": 0.16, "elapsed_time": "0:00:46", "remaining_time": "7:59:36"}
8
+ {"current_steps": 8, "total_steps": 4354, "loss": 1.4032, "lr": 8.000000000000001e-06, "epoch": 0.0036736005877760942, "percentage": 0.18, "elapsed_time": "0:00:49", "remaining_time": "7:31:11"}
9
+ {"current_steps": 9, "total_steps": 4354, "loss": 1.4232, "lr": 9e-06, "epoch": 0.004132800661248106, "percentage": 0.21, "elapsed_time": "0:00:53", "remaining_time": "7:10:58"}
10
+ {"current_steps": 10, "total_steps": 4354, "loss": 1.2924, "lr": 1e-05, "epoch": 0.004592000734720118, "percentage": 0.23, "elapsed_time": "0:00:57", "remaining_time": "6:54:30"}
11
+ {"current_steps": 11, "total_steps": 4354, "loss": 1.3079, "lr": 1.1000000000000001e-05, "epoch": 0.005051200808192129, "percentage": 0.25, "elapsed_time": "0:01:00", "remaining_time": "6:40:40"}
12
+ {"current_steps": 12, "total_steps": 4354, "loss": 1.2329, "lr": 1.2e-05, "epoch": 0.005510400881664141, "percentage": 0.28, "elapsed_time": "0:01:04", "remaining_time": "6:28:55"}
13
+ {"current_steps": 13, "total_steps": 4354, "loss": 1.309, "lr": 1.3000000000000001e-05, "epoch": 0.005969600955136153, "percentage": 0.3, "elapsed_time": "0:01:08", "remaining_time": "6:19:46"}
14
+ {"current_steps": 14, "total_steps": 4354, "loss": 1.4139, "lr": 1.4000000000000001e-05, "epoch": 0.006428801028608165, "percentage": 0.32, "elapsed_time": "0:01:12", "remaining_time": "6:12:14"}
15
+ {"current_steps": 15, "total_steps": 4354, "loss": 1.3265, "lr": 1.5e-05, "epoch": 0.006888001102080176, "percentage": 0.34, "elapsed_time": "0:01:15", "remaining_time": "6:04:32"}
16
+ {"current_steps": 16, "total_steps": 4354, "loss": 1.3244, "lr": 1.6000000000000003e-05, "epoch": 0.0073472011755521885, "percentage": 0.37, "elapsed_time": "0:01:19", "remaining_time": "5:59:04"}
17
+ {"current_steps": 17, "total_steps": 4354, "loss": 1.4398, "lr": 1.7000000000000003e-05, "epoch": 0.0078064012490242, "percentage": 0.39, "elapsed_time": "0:01:23", "remaining_time": "5:53:19"}
18
+ {"current_steps": 18, "total_steps": 4354, "loss": 1.3425, "lr": 1.8e-05, "epoch": 0.008265601322496211, "percentage": 0.41, "elapsed_time": "0:01:26", "remaining_time": "5:48:28"}
19
+ {"current_steps": 19, "total_steps": 4354, "loss": 1.3846, "lr": 1.9e-05, "epoch": 0.008724801395968223, "percentage": 0.44, "elapsed_time": "0:01:30", "remaining_time": "5:44:19"}
20
+ {"current_steps": 20, "total_steps": 4354, "loss": 1.3338, "lr": 2e-05, "epoch": 0.009184001469440236, "percentage": 0.46, "elapsed_time": "0:01:34", "remaining_time": "5:39:51"}
21
+ {"current_steps": 21, "total_steps": 4354, "loss": 1.3299, "lr": 2.1e-05, "epoch": 0.009643201542912247, "percentage": 0.48, "elapsed_time": "0:01:37", "remaining_time": "5:36:15"}
22
+ {"current_steps": 22, "total_steps": 4354, "loss": 1.3151, "lr": 2.2000000000000003e-05, "epoch": 0.010102401616384258, "percentage": 0.51, "elapsed_time": "0:01:41", "remaining_time": "5:33:02"}
23
+ {"current_steps": 23, "total_steps": 4354, "loss": 1.2471, "lr": 2.3000000000000003e-05, "epoch": 0.01056160168985627, "percentage": 0.53, "elapsed_time": "0:01:45", "remaining_time": "5:30:52"}
24
+ {"current_steps": 24, "total_steps": 4354, "loss": 1.2738, "lr": 2.4e-05, "epoch": 0.011020801763328283, "percentage": 0.55, "elapsed_time": "0:01:49", "remaining_time": "5:27:54"}
25
+ {"current_steps": 25, "total_steps": 4354, "loss": 1.4082, "lr": 2.5e-05, "epoch": 0.011480001836800294, "percentage": 0.57, "elapsed_time": "0:01:52", "remaining_time": "5:25:14"}
26
+ {"current_steps": 26, "total_steps": 4354, "loss": 1.2626, "lr": 2.6000000000000002e-05, "epoch": 0.011939201910272305, "percentage": 0.6, "elapsed_time": "0:01:56", "remaining_time": "5:23:28"}
27
+ {"current_steps": 27, "total_steps": 4354, "loss": 1.3043, "lr": 2.7000000000000002e-05, "epoch": 0.012398401983744317, "percentage": 0.62, "elapsed_time": "0:02:00", "remaining_time": "5:21:20"}
28
+ {"current_steps": 28, "total_steps": 4354, "loss": 1.3183, "lr": 2.8000000000000003e-05, "epoch": 0.01285760205721633, "percentage": 0.64, "elapsed_time": "0:02:04", "remaining_time": "5:19:24"}
29
+ {"current_steps": 29, "total_steps": 4354, "loss": 1.3234, "lr": 2.9e-05, "epoch": 0.013316802130688341, "percentage": 0.67, "elapsed_time": "0:02:07", "remaining_time": "5:17:22"}
30
+ {"current_steps": 30, "total_steps": 4354, "loss": 1.2934, "lr": 3e-05, "epoch": 0.013776002204160353, "percentage": 0.69, "elapsed_time": "0:02:11", "remaining_time": "5:15:39"}
31
+ {"current_steps": 31, "total_steps": 4354, "loss": 1.32, "lr": 3.1e-05, "epoch": 0.014235202277632364, "percentage": 0.71, "elapsed_time": "0:02:15", "remaining_time": "5:14:11"}
32
+ {"current_steps": 32, "total_steps": 4354, "loss": 1.3079, "lr": 3.2000000000000005e-05, "epoch": 0.014694402351104377, "percentage": 0.73, "elapsed_time": "0:02:18", "remaining_time": "5:12:45"}
33
+ {"current_steps": 33, "total_steps": 4354, "loss": 1.3176, "lr": 3.3e-05, "epoch": 0.015153602424576388, "percentage": 0.76, "elapsed_time": "0:02:22", "remaining_time": "5:11:13"}
34
+ {"current_steps": 34, "total_steps": 4354, "loss": 1.3174, "lr": 3.4000000000000007e-05, "epoch": 0.0156128024980484, "percentage": 0.78, "elapsed_time": "0:02:26", "remaining_time": "5:09:39"}
35
+ {"current_steps": 35, "total_steps": 4354, "loss": 1.2815, "lr": 3.5e-05, "epoch": 0.016072002571520413, "percentage": 0.8, "elapsed_time": "0:02:30", "remaining_time": "5:08:39"}
36
+ {"current_steps": 36, "total_steps": 4354, "loss": 1.297, "lr": 3.6e-05, "epoch": 0.016531202644992422, "percentage": 0.83, "elapsed_time": "0:02:33", "remaining_time": "5:07:46"}
37
+ {"current_steps": 37, "total_steps": 4354, "loss": 1.3368, "lr": 3.7e-05, "epoch": 0.016990402718464435, "percentage": 0.85, "elapsed_time": "0:02:37", "remaining_time": "5:06:16"}
38
+ {"current_steps": 38, "total_steps": 4354, "loss": 1.3747, "lr": 3.8e-05, "epoch": 0.017449602791936445, "percentage": 0.87, "elapsed_time": "0:02:41", "remaining_time": "5:05:16"}
39
+ {"current_steps": 39, "total_steps": 4354, "loss": 1.3036, "lr": 3.9000000000000006e-05, "epoch": 0.017908802865408458, "percentage": 0.9, "elapsed_time": "0:02:45", "remaining_time": "5:04:21"}
40
+ {"current_steps": 40, "total_steps": 4354, "loss": 1.3682, "lr": 4e-05, "epoch": 0.01836800293888047, "percentage": 0.92, "elapsed_time": "0:02:48", "remaining_time": "5:03:15"}
41
+ {"current_steps": 41, "total_steps": 4354, "loss": 1.3134, "lr": 4.1e-05, "epoch": 0.01882720301235248, "percentage": 0.94, "elapsed_time": "0:02:54", "remaining_time": "5:05:16"}
42
+ {"current_steps": 42, "total_steps": 4354, "loss": 1.3683, "lr": 4.2e-05, "epoch": 0.019286403085824494, "percentage": 0.96, "elapsed_time": "0:02:57", "remaining_time": "5:04:12"}
43
+ {"current_steps": 43, "total_steps": 4354, "loss": 1.3823, "lr": 4.3e-05, "epoch": 0.019745603159296507, "percentage": 0.99, "elapsed_time": "0:03:01", "remaining_time": "5:03:24"}
44
+ {"current_steps": 44, "total_steps": 4354, "loss": 1.3199, "lr": 4.4000000000000006e-05, "epoch": 0.020204803232768517, "percentage": 1.01, "elapsed_time": "0:03:06", "remaining_time": "5:03:43"}
45
+ {"current_steps": 45, "total_steps": 4354, "loss": 1.3567, "lr": 4.5e-05, "epoch": 0.02066400330624053, "percentage": 1.03, "elapsed_time": "0:03:09", "remaining_time": "5:03:09"}
46
+ {"current_steps": 46, "total_steps": 4354, "loss": 1.4144, "lr": 4.600000000000001e-05, "epoch": 0.02112320337971254, "percentage": 1.06, "elapsed_time": "0:03:13", "remaining_time": "5:02:27"}
47
+ {"current_steps": 47, "total_steps": 4354, "loss": 1.2648, "lr": 4.7e-05, "epoch": 0.021582403453184552, "percentage": 1.08, "elapsed_time": "0:03:17", "remaining_time": "5:01:58"}
48
+ {"current_steps": 48, "total_steps": 4354, "loss": 1.4216, "lr": 4.8e-05, "epoch": 0.022041603526656565, "percentage": 1.1, "elapsed_time": "0:03:21", "remaining_time": "5:01:19"}
49
+ {"current_steps": 49, "total_steps": 4354, "loss": 1.3791, "lr": 4.9e-05, "epoch": 0.022500803600128575, "percentage": 1.13, "elapsed_time": "0:03:25", "remaining_time": "5:00:29"}
50
+ {"current_steps": 50, "total_steps": 4354, "loss": 1.3768, "lr": 5e-05, "epoch": 0.022960003673600588, "percentage": 1.15, "elapsed_time": "0:03:29", "remaining_time": "4:59:56"}
51
+ {"current_steps": 51, "total_steps": 4354, "loss": 1.3168, "lr": 4.9999993340138253e-05, "epoch": 0.0234192037470726, "percentage": 1.17, "elapsed_time": "0:03:32", "remaining_time": "4:59:20"}
52
+ {"current_steps": 52, "total_steps": 4354, "loss": 1.3193, "lr": 4.999997336055656e-05, "epoch": 0.02387840382054461, "percentage": 1.19, "elapsed_time": "0:03:36", "remaining_time": "4:58:30"}
53
+ {"current_steps": 53, "total_steps": 4354, "loss": 1.3256, "lr": 4.999994006126555e-05, "epoch": 0.024337603894016624, "percentage": 1.22, "elapsed_time": "0:03:40", "remaining_time": "4:58:44"}
54
+ {"current_steps": 54, "total_steps": 4354, "loss": 1.3745, "lr": 4.9999893442282986e-05, "epoch": 0.024796803967488634, "percentage": 1.24, "elapsed_time": "0:03:44", "remaining_time": "4:57:54"}
55
+ {"current_steps": 55, "total_steps": 4354, "loss": 1.3414, "lr": 4.99998335036337e-05, "epoch": 0.025256004040960647, "percentage": 1.26, "elapsed_time": "0:03:48", "remaining_time": "4:57:15"}
56
+ {"current_steps": 56, "total_steps": 4354, "loss": 1.3981, "lr": 4.999976024534962e-05, "epoch": 0.02571520411443266, "percentage": 1.29, "elapsed_time": "0:03:52", "remaining_time": "4:56:47"}
57
+ {"current_steps": 57, "total_steps": 4354, "loss": 1.4594, "lr": 4.9999673667469783e-05, "epoch": 0.02617440418790467, "percentage": 1.31, "elapsed_time": "0:03:55", "remaining_time": "4:56:10"}
58
+ {"current_steps": 58, "total_steps": 4354, "loss": 1.3603, "lr": 4.999957377004031e-05, "epoch": 0.026633604261376682, "percentage": 1.33, "elapsed_time": "0:03:59", "remaining_time": "4:55:40"}
59
+ {"current_steps": 59, "total_steps": 4354, "loss": 1.3368, "lr": 4.999946055311444e-05, "epoch": 0.027092804334848692, "percentage": 1.36, "elapsed_time": "0:04:03", "remaining_time": "4:55:03"}
60
+ {"current_steps": 60, "total_steps": 4354, "loss": 1.3206, "lr": 4.9999334016752476e-05, "epoch": 0.027552004408320705, "percentage": 1.38, "elapsed_time": "0:04:07", "remaining_time": "4:54:43"}
61
+ {"current_steps": 61, "total_steps": 4354, "loss": 1.3657, "lr": 4.999919416102184e-05, "epoch": 0.028011204481792718, "percentage": 1.4, "elapsed_time": "0:04:10", "remaining_time": "4:54:03"}
62
+ {"current_steps": 62, "total_steps": 4354, "loss": 1.4203, "lr": 4.9999040985997054e-05, "epoch": 0.028470404555264728, "percentage": 1.42, "elapsed_time": "0:04:14", "remaining_time": "4:53:27"}
63
+ {"current_steps": 63, "total_steps": 4354, "loss": 1.3436, "lr": 4.999887449175972e-05, "epoch": 0.02892960462873674, "percentage": 1.45, "elapsed_time": "0:04:18", "remaining_time": "4:52:58"}
64
+ {"current_steps": 64, "total_steps": 4354, "loss": 1.3111, "lr": 4.999869467839855e-05, "epoch": 0.029388804702208754, "percentage": 1.47, "elapsed_time": "0:04:21", "remaining_time": "4:52:36"}
65
+ {"current_steps": 65, "total_steps": 4354, "loss": 1.3866, "lr": 4.999850154600934e-05, "epoch": 0.029848004775680764, "percentage": 1.49, "elapsed_time": "0:04:25", "remaining_time": "4:52:06"}
66
+ {"current_steps": 66, "total_steps": 4354, "loss": 1.2843, "lr": 4.999829509469499e-05, "epoch": 0.030307204849152777, "percentage": 1.52, "elapsed_time": "0:04:29", "remaining_time": "4:51:39"}
67
+ {"current_steps": 67, "total_steps": 4354, "loss": 1.3595, "lr": 4.9998075324565505e-05, "epoch": 0.030766404922624786, "percentage": 1.54, "elapsed_time": "0:04:33", "remaining_time": "4:51:21"}
68
+ {"current_steps": 68, "total_steps": 4354, "loss": 1.3464, "lr": 4.999784223573797e-05, "epoch": 0.0312256049960968, "percentage": 1.56, "elapsed_time": "0:04:37", "remaining_time": "4:50:59"}
69
+ {"current_steps": 69, "total_steps": 4354, "loss": 1.3117, "lr": 4.999759582833656e-05, "epoch": 0.03168480506956881, "percentage": 1.58, "elapsed_time": "0:04:40", "remaining_time": "4:50:35"}
70
+ {"current_steps": 70, "total_steps": 4354, "loss": 1.4115, "lr": 4.9997336102492574e-05, "epoch": 0.032144005143040826, "percentage": 1.61, "elapsed_time": "0:04:44", "remaining_time": "4:50:11"}
71
+ {"current_steps": 71, "total_steps": 4354, "loss": 1.3594, "lr": 4.999706305834438e-05, "epoch": 0.03260320521651283, "percentage": 1.63, "elapsed_time": "0:04:48", "remaining_time": "4:49:53"}
72
+ {"current_steps": 72, "total_steps": 4354, "loss": 1.3219, "lr": 4.9996776696037476e-05, "epoch": 0.033062405289984845, "percentage": 1.65, "elapsed_time": "0:04:52", "remaining_time": "4:49:54"}
73
+ {"current_steps": 73, "total_steps": 4354, "loss": 1.418, "lr": 4.99964770157244e-05, "epoch": 0.03352160536345686, "percentage": 1.68, "elapsed_time": "0:04:56", "remaining_time": "4:49:35"}
74
+ {"current_steps": 74, "total_steps": 4354, "loss": 1.2797, "lr": 4.9996164017564837e-05, "epoch": 0.03398080543692887, "percentage": 1.7, "elapsed_time": "0:05:00", "remaining_time": "4:49:15"}
75
+ {"current_steps": 75, "total_steps": 4354, "loss": 1.2866, "lr": 4.9995837701725544e-05, "epoch": 0.034440005510400884, "percentage": 1.72, "elapsed_time": "0:05:04", "remaining_time": "4:49:17"}
76
+ {"current_steps": 76, "total_steps": 4354, "loss": 1.3651, "lr": 4.9995498068380374e-05, "epoch": 0.03489920558387289, "percentage": 1.75, "elapsed_time": "0:05:07", "remaining_time": "4:48:52"}
77
+ {"current_steps": 77, "total_steps": 4354, "loss": 1.3065, "lr": 4.999514511771029e-05, "epoch": 0.0353584056573449, "percentage": 1.77, "elapsed_time": "0:05:11", "remaining_time": "4:48:25"}
78
+ {"current_steps": 78, "total_steps": 4354, "loss": 1.4687, "lr": 4.999477884990334e-05, "epoch": 0.035817605730816916, "percentage": 1.79, "elapsed_time": "0:05:15", "remaining_time": "4:47:57"}
79
+ {"current_steps": 79, "total_steps": 4354, "loss": 1.3369, "lr": 4.9994399265154666e-05, "epoch": 0.03627680580428893, "percentage": 1.81, "elapsed_time": "0:05:19", "remaining_time": "4:47:56"}
80
+ {"current_steps": 80, "total_steps": 4354, "loss": 1.3576, "lr": 4.99940063636665e-05, "epoch": 0.03673600587776094, "percentage": 1.84, "elapsed_time": "0:05:23", "remaining_time": "4:47:48"}
81
+ {"current_steps": 81, "total_steps": 4354, "loss": 1.4062, "lr": 4.999360014564819e-05, "epoch": 0.037195205951232956, "percentage": 1.86, "elapsed_time": "0:05:26", "remaining_time": "4:47:22"}
82
+ {"current_steps": 82, "total_steps": 4354, "loss": 1.3121, "lr": 4.999318061131614e-05, "epoch": 0.03765440602470496, "percentage": 1.88, "elapsed_time": "0:05:30", "remaining_time": "4:47:03"}
83
+ {"current_steps": 83, "total_steps": 4354, "loss": 1.3783, "lr": 4.999274776089389e-05, "epoch": 0.038113606098176975, "percentage": 1.91, "elapsed_time": "0:05:34", "remaining_time": "4:46:36"}
84
+ {"current_steps": 84, "total_steps": 4354, "loss": 1.3433, "lr": 4.9992301594612055e-05, "epoch": 0.03857280617164899, "percentage": 1.93, "elapsed_time": "0:05:37", "remaining_time": "4:46:09"}
85
+ {"current_steps": 85, "total_steps": 4354, "loss": 1.4302, "lr": 4.999184211270835e-05, "epoch": 0.039032006245121, "percentage": 1.95, "elapsed_time": "0:05:41", "remaining_time": "4:45:52"}
86
+ {"current_steps": 86, "total_steps": 4354, "loss": 1.3039, "lr": 4.999136931542758e-05, "epoch": 0.039491206318593014, "percentage": 1.98, "elapsed_time": "0:05:45", "remaining_time": "4:45:40"}
87
+ {"current_steps": 87, "total_steps": 4354, "loss": 1.3584, "lr": 4.999088320302166e-05, "epoch": 0.03995040639206502, "percentage": 2.0, "elapsed_time": "0:05:49", "remaining_time": "4:45:45"}
88
+ {"current_steps": 88, "total_steps": 4354, "loss": 1.2915, "lr": 4.999038377574955e-05, "epoch": 0.04040960646553703, "percentage": 2.02, "elapsed_time": "0:05:53", "remaining_time": "4:45:24"}
89
+ {"current_steps": 89, "total_steps": 4354, "loss": 1.4106, "lr": 4.998987103387737e-05, "epoch": 0.040868806539009046, "percentage": 2.04, "elapsed_time": "0:05:56", "remaining_time": "4:45:02"}
90
+ {"current_steps": 90, "total_steps": 4354, "loss": 1.3344, "lr": 4.9989344977678285e-05, "epoch": 0.04132800661248106, "percentage": 2.07, "elapsed_time": "0:06:00", "remaining_time": "4:44:43"}
91
+ {"current_steps": 91, "total_steps": 4354, "loss": 1.2863, "lr": 4.998880560743259e-05, "epoch": 0.04178720668595307, "percentage": 2.09, "elapsed_time": "0:06:04", "remaining_time": "4:44:27"}
92
+ {"current_steps": 92, "total_steps": 4354, "loss": 1.3271, "lr": 4.9988252923427634e-05, "epoch": 0.04224640675942508, "percentage": 2.11, "elapsed_time": "0:06:08", "remaining_time": "4:44:13"}
93
+ {"current_steps": 93, "total_steps": 4354, "loss": 1.3344, "lr": 4.998768692595791e-05, "epoch": 0.04270560683289709, "percentage": 2.14, "elapsed_time": "0:06:11", "remaining_time": "4:43:56"}
94
+ {"current_steps": 94, "total_steps": 4354, "loss": 1.2922, "lr": 4.9987107615324944e-05, "epoch": 0.043164806906369105, "percentage": 2.16, "elapsed_time": "0:06:15", "remaining_time": "4:43:41"}
95
+ {"current_steps": 95, "total_steps": 4354, "loss": 1.4438, "lr": 4.998651499183741e-05, "epoch": 0.04362400697984112, "percentage": 2.18, "elapsed_time": "0:06:19", "remaining_time": "4:43:24"}
96
+ {"current_steps": 96, "total_steps": 4354, "loss": 1.3937, "lr": 4.998590905581104e-05, "epoch": 0.04408320705331313, "percentage": 2.2, "elapsed_time": "0:06:23", "remaining_time": "4:43:15"}
97
+ {"current_steps": 97, "total_steps": 4354, "loss": 1.3546, "lr": 4.9985289807568666e-05, "epoch": 0.04454240712678514, "percentage": 2.23, "elapsed_time": "0:06:26", "remaining_time": "4:42:58"}
98
+ {"current_steps": 98, "total_steps": 4354, "loss": 1.4719, "lr": 4.998465724744022e-05, "epoch": 0.04500160720025715, "percentage": 2.25, "elapsed_time": "0:06:30", "remaining_time": "4:42:43"}
99
+ {"current_steps": 99, "total_steps": 4354, "loss": 1.3202, "lr": 4.998401137576274e-05, "epoch": 0.04546080727372916, "percentage": 2.27, "elapsed_time": "0:06:34", "remaining_time": "4:42:33"}
100
+ {"current_steps": 100, "total_steps": 4354, "loss": 1.3113, "lr": 4.998335219288032e-05, "epoch": 0.045920007347201176, "percentage": 2.3, "elapsed_time": "0:06:38", "remaining_time": "4:42:15"}
101
+ {"current_steps": 101, "total_steps": 4354, "loss": 1.4094, "lr": 4.998267969914417e-05, "epoch": 0.04637920742067319, "percentage": 2.32, "elapsed_time": "0:06:41", "remaining_time": "4:41:59"}
102
+ {"current_steps": 102, "total_steps": 4354, "loss": 1.3257, "lr": 4.998199389491257e-05, "epoch": 0.0468384074941452, "percentage": 2.34, "elapsed_time": "0:06:45", "remaining_time": "4:41:47"}
103
+ {"current_steps": 103, "total_steps": 4354, "loss": 1.2924, "lr": 4.998129478055095e-05, "epoch": 0.04729760756761721, "percentage": 2.37, "elapsed_time": "0:06:49", "remaining_time": "4:41:37"}
104
+ {"current_steps": 104, "total_steps": 4354, "loss": 1.4036, "lr": 4.998058235643175e-05, "epoch": 0.04775680764108922, "percentage": 2.39, "elapsed_time": "0:06:53", "remaining_time": "4:41:18"}
105
+ {"current_steps": 105, "total_steps": 4354, "loss": 1.2819, "lr": 4.9979856622934564e-05, "epoch": 0.048216007714561235, "percentage": 2.41, "elapsed_time": "0:06:57", "remaining_time": "4:41:19"}
106
+ {"current_steps": 106, "total_steps": 4354, "loss": 1.3171, "lr": 4.997911758044605e-05, "epoch": 0.04867520778803325, "percentage": 2.43, "elapsed_time": "0:07:00", "remaining_time": "4:41:04"}
107
+ {"current_steps": 107, "total_steps": 4354, "loss": 1.3574, "lr": 4.997836522935996e-05, "epoch": 0.04913440786150526, "percentage": 2.46, "elapsed_time": "0:07:04", "remaining_time": "4:40:44"}
108
+ {"current_steps": 108, "total_steps": 4354, "loss": 1.2778, "lr": 4.997759957007714e-05, "epoch": 0.04959360793497727, "percentage": 2.48, "elapsed_time": "0:07:08", "remaining_time": "4:40:51"}
109
+ {"current_steps": 109, "total_steps": 4354, "loss": 1.3896, "lr": 4.997682060300553e-05, "epoch": 0.05005280800844928, "percentage": 2.5, "elapsed_time": "0:07:12", "remaining_time": "4:40:44"}
110
+ {"current_steps": 110, "total_steps": 4354, "loss": 1.402, "lr": 4.997602832856013e-05, "epoch": 0.05051200808192129, "percentage": 2.53, "elapsed_time": "0:07:16", "remaining_time": "4:40:40"}
111
+ {"current_steps": 111, "total_steps": 4354, "loss": 1.3787, "lr": 4.997522274716309e-05, "epoch": 0.050971208155393306, "percentage": 2.55, "elapsed_time": "0:07:20", "remaining_time": "4:40:42"}
112
+ {"current_steps": 112, "total_steps": 4354, "loss": 1.3412, "lr": 4.997440385924359e-05, "epoch": 0.05143040822886532, "percentage": 2.57, "elapsed_time": "0:07:24", "remaining_time": "4:40:29"}
113
+ {"current_steps": 113, "total_steps": 4354, "loss": 1.2783, "lr": 4.997357166523794e-05, "epoch": 0.051889608302337326, "percentage": 2.6, "elapsed_time": "0:07:28", "remaining_time": "4:40:35"}
114
+ {"current_steps": 114, "total_steps": 4354, "loss": 1.3426, "lr": 4.9972726165589515e-05, "epoch": 0.05234880837580934, "percentage": 2.62, "elapsed_time": "0:07:32", "remaining_time": "4:40:17"}
115
+ {"current_steps": 115, "total_steps": 4354, "loss": 1.3072, "lr": 4.997186736074879e-05, "epoch": 0.05280800844928135, "percentage": 2.64, "elapsed_time": "0:07:35", "remaining_time": "4:40:04"}
116
+ {"current_steps": 116, "total_steps": 4354, "loss": 1.3492, "lr": 4.997099525117332e-05, "epoch": 0.053267208522753365, "percentage": 2.66, "elapsed_time": "0:07:39", "remaining_time": "4:39:52"}
117
+ {"current_steps": 117, "total_steps": 4354, "loss": 1.395, "lr": 4.9970109837327775e-05, "epoch": 0.05372640859622538, "percentage": 2.69, "elapsed_time": "0:07:44", "remaining_time": "4:40:30"}
118
+ {"current_steps": 118, "total_steps": 4354, "loss": 1.4371, "lr": 4.996921111968387e-05, "epoch": 0.054185608669697384, "percentage": 2.71, "elapsed_time": "0:07:48", "remaining_time": "4:40:15"}
119
+ {"current_steps": 119, "total_steps": 4354, "loss": 1.343, "lr": 4.996829909872045e-05, "epoch": 0.0546448087431694, "percentage": 2.73, "elapsed_time": "0:07:52", "remaining_time": "4:40:01"}
120
+ {"current_steps": 120, "total_steps": 4354, "loss": 1.2427, "lr": 4.996737377492342e-05, "epoch": 0.05510400881664141, "percentage": 2.76, "elapsed_time": "0:07:56", "remaining_time": "4:40:17"}
121
+ {"current_steps": 121, "total_steps": 4354, "loss": 1.2891, "lr": 4.996643514878578e-05, "epoch": 0.05556320889011342, "percentage": 2.78, "elapsed_time": "0:08:00", "remaining_time": "4:39:59"}
122
+ {"current_steps": 122, "total_steps": 4354, "loss": 1.3337, "lr": 4.9965483220807627e-05, "epoch": 0.056022408963585436, "percentage": 2.8, "elapsed_time": "0:08:03", "remaining_time": "4:39:42"}
123
+ {"current_steps": 123, "total_steps": 4354, "loss": 1.3042, "lr": 4.9964517991496137e-05, "epoch": 0.05648160903705744, "percentage": 2.82, "elapsed_time": "0:08:07", "remaining_time": "4:39:41"}
124
+ {"current_steps": 124, "total_steps": 4354, "loss": 1.4047, "lr": 4.996353946136557e-05, "epoch": 0.056940809110529456, "percentage": 2.85, "elapsed_time": "0:08:11", "remaining_time": "4:39:26"}
125
+ {"current_steps": 125, "total_steps": 4354, "loss": 1.317, "lr": 4.9962547630937275e-05, "epoch": 0.05740000918400147, "percentage": 2.87, "elapsed_time": "0:08:15", "remaining_time": "4:39:14"}
126
+ {"current_steps": 126, "total_steps": 4354, "loss": 1.3812, "lr": 4.996154250073969e-05, "epoch": 0.05785920925747348, "percentage": 2.89, "elapsed_time": "0:08:19", "remaining_time": "4:39:05"}
127
+ {"current_steps": 127, "total_steps": 4354, "loss": 1.3448, "lr": 4.996052407130833e-05, "epoch": 0.058318409330945495, "percentage": 2.92, "elapsed_time": "0:08:22", "remaining_time": "4:38:59"}
128
+ {"current_steps": 128, "total_steps": 4354, "loss": 1.2553, "lr": 4.9959492343185834e-05, "epoch": 0.05877760940441751, "percentage": 2.94, "elapsed_time": "0:08:26", "remaining_time": "4:38:45"}
129
+ {"current_steps": 129, "total_steps": 4354, "loss": 1.2989, "lr": 4.995844731692185e-05, "epoch": 0.059236809477889514, "percentage": 2.96, "elapsed_time": "0:08:30", "remaining_time": "4:38:32"}
130
+ {"current_steps": 130, "total_steps": 4354, "loss": 1.3442, "lr": 4.995738899307319e-05, "epoch": 0.05969600955136153, "percentage": 2.99, "elapsed_time": "0:08:34", "remaining_time": "4:38:21"}
131
+ {"current_steps": 131, "total_steps": 4354, "loss": 1.3869, "lr": 4.99563173722037e-05, "epoch": 0.06015520962483354, "percentage": 3.01, "elapsed_time": "0:08:37", "remaining_time": "4:38:16"}
132
+ {"current_steps": 132, "total_steps": 4354, "loss": 1.4096, "lr": 4.995523245488434e-05, "epoch": 0.06061440969830555, "percentage": 3.03, "elapsed_time": "0:08:41", "remaining_time": "4:38:06"}
133
+ {"current_steps": 133, "total_steps": 4354, "loss": 1.3141, "lr": 4.995413424169313e-05, "epoch": 0.061073609771777566, "percentage": 3.05, "elapsed_time": "0:08:45", "remaining_time": "4:37:56"}
134
+ {"current_steps": 134, "total_steps": 4354, "loss": 1.3398, "lr": 4.995302273321519e-05, "epoch": 0.06153280984524957, "percentage": 3.08, "elapsed_time": "0:08:49", "remaining_time": "4:37:54"}
135
+ {"current_steps": 135, "total_steps": 4354, "loss": 1.3507, "lr": 4.995189793004272e-05, "epoch": 0.061992009918721586, "percentage": 3.1, "elapsed_time": "0:08:53", "remaining_time": "4:37:46"}
136
+ {"current_steps": 136, "total_steps": 4354, "loss": 1.348, "lr": 4.995075983277501e-05, "epoch": 0.0624512099921936, "percentage": 3.12, "elapsed_time": "0:08:57", "remaining_time": "4:37:43"}
137
+ {"current_steps": 137, "total_steps": 4354, "loss": 1.3355, "lr": 4.994960844201841e-05, "epoch": 0.06291041006566561, "percentage": 3.15, "elapsed_time": "0:09:01", "remaining_time": "4:37:46"}
138
+ {"current_steps": 138, "total_steps": 4354, "loss": 1.4539, "lr": 4.9948443758386384e-05, "epoch": 0.06336961013913762, "percentage": 3.17, "elapsed_time": "0:09:04", "remaining_time": "4:37:29"}
139
+ {"current_steps": 139, "total_steps": 4354, "loss": 1.3626, "lr": 4.994726578249946e-05, "epoch": 0.06382881021260964, "percentage": 3.19, "elapsed_time": "0:09:08", "remaining_time": "4:37:24"}
140
+ {"current_steps": 140, "total_steps": 4354, "loss": 1.3372, "lr": 4.994607451498524e-05, "epoch": 0.06428801028608165, "percentage": 3.22, "elapsed_time": "0:09:12", "remaining_time": "4:37:15"}
141
+ {"current_steps": 141, "total_steps": 4354, "loss": 1.3572, "lr": 4.9944869956478425e-05, "epoch": 0.06474721035955366, "percentage": 3.24, "elapsed_time": "0:09:16", "remaining_time": "4:37:06"}
142
+ {"current_steps": 142, "total_steps": 4354, "loss": 1.2921, "lr": 4.994365210762079e-05, "epoch": 0.06520641043302566, "percentage": 3.26, "elapsed_time": "0:09:20", "remaining_time": "4:36:52"}
143
+ {"current_steps": 143, "total_steps": 4354, "loss": 1.3875, "lr": 4.9942420969061196e-05, "epoch": 0.06566561050649768, "percentage": 3.28, "elapsed_time": "0:09:23", "remaining_time": "4:36:38"}
144
+ {"current_steps": 144, "total_steps": 4354, "loss": 1.3841, "lr": 4.994117654145557e-05, "epoch": 0.06612481057996969, "percentage": 3.31, "elapsed_time": "0:09:27", "remaining_time": "4:36:24"}
145
+ {"current_steps": 145, "total_steps": 4354, "loss": 1.3194, "lr": 4.9939918825466935e-05, "epoch": 0.0665840106534417, "percentage": 3.33, "elapsed_time": "0:09:30", "remaining_time": "4:36:13"}
146
+ {"current_steps": 146, "total_steps": 4354, "loss": 1.3875, "lr": 4.993864782176539e-05, "epoch": 0.06704321072691372, "percentage": 3.35, "elapsed_time": "0:09:34", "remaining_time": "4:36:05"}
147
+ {"current_steps": 147, "total_steps": 4354, "loss": 1.3994, "lr": 4.99373635310281e-05, "epoch": 0.06750241080038573, "percentage": 3.38, "elapsed_time": "0:09:38", "remaining_time": "4:36:00"}
148
+ {"current_steps": 148, "total_steps": 4354, "loss": 1.2841, "lr": 4.993606595393935e-05, "epoch": 0.06796161087385774, "percentage": 3.4, "elapsed_time": "0:09:42", "remaining_time": "4:35:52"}
149
+ {"current_steps": 149, "total_steps": 4354, "loss": 1.3448, "lr": 4.993475509119044e-05, "epoch": 0.06842081094732975, "percentage": 3.42, "elapsed_time": "0:09:46", "remaining_time": "4:35:43"}
150
+ {"current_steps": 150, "total_steps": 4354, "loss": 1.4078, "lr": 4.99334309434798e-05, "epoch": 0.06888001102080177, "percentage": 3.45, "elapsed_time": "0:09:49", "remaining_time": "4:35:31"}
151
+ {"current_steps": 151, "total_steps": 4354, "loss": 1.3237, "lr": 4.993209351151292e-05, "epoch": 0.06933921109427378, "percentage": 3.47, "elapsed_time": "0:10:00", "remaining_time": "4:38:27"}
152
+ {"current_steps": 152, "total_steps": 4354, "loss": 1.2682, "lr": 4.993074279600237e-05, "epoch": 0.06979841116774578, "percentage": 3.49, "elapsed_time": "0:10:03", "remaining_time": "4:38:14"}
153
+ {"current_steps": 153, "total_steps": 4354, "loss": 1.3408, "lr": 4.9929378797667796e-05, "epoch": 0.0702576112412178, "percentage": 3.51, "elapsed_time": "0:10:07", "remaining_time": "4:38:08"}
154
+ {"current_steps": 154, "total_steps": 4354, "loss": 1.3021, "lr": 4.992800151723592e-05, "epoch": 0.0707168113146898, "percentage": 3.54, "elapsed_time": "0:10:11", "remaining_time": "4:37:58"}
155
+ {"current_steps": 155, "total_steps": 4354, "loss": 1.3997, "lr": 4.9926610955440545e-05, "epoch": 0.07117601138816182, "percentage": 3.56, "elapsed_time": "0:10:15", "remaining_time": "4:37:44"}
156
+ {"current_steps": 156, "total_steps": 4354, "loss": 1.269, "lr": 4.992520711302254e-05, "epoch": 0.07163521146163383, "percentage": 3.58, "elapsed_time": "0:10:19", "remaining_time": "4:37:37"}
157
+ {"current_steps": 157, "total_steps": 4354, "loss": 1.378, "lr": 4.992378999072986e-05, "epoch": 0.07209441153510585, "percentage": 3.61, "elapsed_time": "0:10:22", "remaining_time": "4:37:33"}
158
+ {"current_steps": 158, "total_steps": 4354, "loss": 1.3742, "lr": 4.992235958931753e-05, "epoch": 0.07255361160857786, "percentage": 3.63, "elapsed_time": "0:10:26", "remaining_time": "4:37:27"}
159
+ {"current_steps": 159, "total_steps": 4354, "loss": 1.3491, "lr": 4.992091590954766e-05, "epoch": 0.07301281168204987, "percentage": 3.65, "elapsed_time": "0:10:30", "remaining_time": "4:37:21"}
160
+ {"current_steps": 160, "total_steps": 4354, "loss": 1.3206, "lr": 4.991945895218941e-05, "epoch": 0.07347201175552188, "percentage": 3.67, "elapsed_time": "0:10:35", "remaining_time": "4:37:30"}
161
+ {"current_steps": 161, "total_steps": 4354, "loss": 1.3445, "lr": 4.991798871801906e-05, "epoch": 0.0739312118289939, "percentage": 3.7, "elapsed_time": "0:10:38", "remaining_time": "4:37:19"}
162
+ {"current_steps": 162, "total_steps": 4354, "loss": 1.3591, "lr": 4.99165052078199e-05, "epoch": 0.07439041190246591, "percentage": 3.72, "elapsed_time": "0:10:42", "remaining_time": "4:37:11"}
163
+ {"current_steps": 163, "total_steps": 4354, "loss": 1.3792, "lr": 4.991500842238235e-05, "epoch": 0.07484961197593791, "percentage": 3.74, "elapsed_time": "0:10:46", "remaining_time": "4:37:04"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7126ebeb2b705efb00b4d663513899e51c325af64373b542f9b6a38cd8b1597
3
+ size 7608
vocab.json ADDED
The diff for this file is too large to render. See raw diff