Training in progress, step 150
Browse files- .gitattributes +1 -0
- config.json +29 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- special_tokens_map.json +20 -0
- tokenizer.json +3 -0
- tokenizer_config.json +77 -0
- trainer_log.jsonl +163 -0
- training_args.bin +3 -0
- vocab.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/home/azureuser/Desktop/sk/translator/saves-2/BanglaQwen/translator-cont/checkpoint-785",
|
3 |
+
"architectures": [
|
4 |
+
"Qwen2ForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151643,
|
8 |
+
"eos_token_id": 151643,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 896,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 4864,
|
13 |
+
"max_position_embeddings": 131072,
|
14 |
+
"max_window_layers": 24,
|
15 |
+
"model_type": "qwen2",
|
16 |
+
"num_attention_heads": 14,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"num_key_value_heads": 2,
|
19 |
+
"rms_norm_eps": 1e-06,
|
20 |
+
"rope_scaling": null,
|
21 |
+
"rope_theta": 1000000.0,
|
22 |
+
"sliding_window": null,
|
23 |
+
"tie_word_embeddings": true,
|
24 |
+
"torch_dtype": "bfloat16",
|
25 |
+
"transformers_version": "4.49.0",
|
26 |
+
"use_cache": false,
|
27 |
+
"use_sliding_window": false,
|
28 |
+
"vocab_size": 194498
|
29 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10bb44ee6be697598c52ea185883fb9d39d477d649666a81c0a4d4d2ce946df9
|
3 |
+
size 1064369000
|
special_tokens_map.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|im_start|>",
|
4 |
+
"<|im_end|>"
|
5 |
+
],
|
6 |
+
"eos_token": {
|
7 |
+
"content": "<|im_end|>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false
|
12 |
+
},
|
13 |
+
"pad_token": {
|
14 |
+
"content": "<|endoftext|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false
|
19 |
+
}
|
20 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8ec123df4ff5fd027294ce9931a2709a061268c399250056437627ea20e304f
|
3 |
+
size 27868597
|
tokenizer_config.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"11370": {
|
5 |
+
"content": "/************************************************************************",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"16395": {
|
13 |
+
"content": "%%%%%%%%%%%%%%%%",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"33009": {
|
21 |
+
"content": "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"65080": {
|
29 |
+
"content": "//************************************************************************",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": false,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"151643": {
|
37 |
+
"content": "<|endoftext|>",
|
38 |
+
"lstrip": false,
|
39 |
+
"normalized": false,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
},
|
44 |
+
"151644": {
|
45 |
+
"content": "<|im_start|>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false,
|
50 |
+
"special": true
|
51 |
+
},
|
52 |
+
"151645": {
|
53 |
+
"content": "<|im_end|>",
|
54 |
+
"lstrip": false,
|
55 |
+
"normalized": false,
|
56 |
+
"rstrip": false,
|
57 |
+
"single_word": false,
|
58 |
+
"special": true
|
59 |
+
}
|
60 |
+
},
|
61 |
+
"additional_special_tokens": [
|
62 |
+
"<|im_start|>",
|
63 |
+
"<|im_end|>"
|
64 |
+
],
|
65 |
+
"bos_token": null,
|
66 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are BanglaQwen, developed by AI4BD. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within /************************************************************************//************************************************************************ XML tags:\\n/************************************************************************\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n//************************************************************************<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are BanglaQwen, developed by AI4BD. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n/************************************************************************\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n//************************************************************************' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n%%%%%%%%%%%%%%%%\\n' }}\n {{- message.content }}\n {{- '\\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
67 |
+
"clean_up_tokenization_spaces": false,
|
68 |
+
"eos_token": "<|im_end|>",
|
69 |
+
"errors": "replace",
|
70 |
+
"extra_special_tokens": {},
|
71 |
+
"model_max_length": 4096,
|
72 |
+
"pad_token": "<|endoftext|>",
|
73 |
+
"padding_side": "right",
|
74 |
+
"split_special_tokens": false,
|
75 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
76 |
+
"unk_token": null
|
77 |
+
}
|
trainer_log.jsonl
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"current_steps": 1, "total_steps": 4354, "loss": 1.3804, "lr": 1.0000000000000002e-06, "epoch": 0.0004592000734720118, "percentage": 0.02, "elapsed_time": "0:00:20", "remaining_time": "1 day, 0:57:58"}
|
2 |
+
{"current_steps": 2, "total_steps": 4354, "loss": 1.3895, "lr": 2.0000000000000003e-06, "epoch": 0.0009184001469440236, "percentage": 0.05, "elapsed_time": "0:00:27", "remaining_time": "16:49:38"}
|
3 |
+
{"current_steps": 3, "total_steps": 4354, "loss": 1.2751, "lr": 3e-06, "epoch": 0.0013776002204160353, "percentage": 0.07, "elapsed_time": "0:00:31", "remaining_time": "12:49:11"}
|
4 |
+
{"current_steps": 4, "total_steps": 4354, "loss": 1.3427, "lr": 4.000000000000001e-06, "epoch": 0.0018368002938880471, "percentage": 0.09, "elapsed_time": "0:00:35", "remaining_time": "10:44:28"}
|
5 |
+
{"current_steps": 5, "total_steps": 4354, "loss": 1.3587, "lr": 5e-06, "epoch": 0.002296000367360059, "percentage": 0.11, "elapsed_time": "0:00:39", "remaining_time": "9:28:12"}
|
6 |
+
{"current_steps": 6, "total_steps": 4354, "loss": 1.3953, "lr": 6e-06, "epoch": 0.0027552004408320707, "percentage": 0.14, "elapsed_time": "0:00:42", "remaining_time": "8:37:41"}
|
7 |
+
{"current_steps": 7, "total_steps": 4354, "loss": 1.4018, "lr": 7.000000000000001e-06, "epoch": 0.0032144005143040825, "percentage": 0.16, "elapsed_time": "0:00:46", "remaining_time": "7:59:36"}
|
8 |
+
{"current_steps": 8, "total_steps": 4354, "loss": 1.4032, "lr": 8.000000000000001e-06, "epoch": 0.0036736005877760942, "percentage": 0.18, "elapsed_time": "0:00:49", "remaining_time": "7:31:11"}
|
9 |
+
{"current_steps": 9, "total_steps": 4354, "loss": 1.4232, "lr": 9e-06, "epoch": 0.004132800661248106, "percentage": 0.21, "elapsed_time": "0:00:53", "remaining_time": "7:10:58"}
|
10 |
+
{"current_steps": 10, "total_steps": 4354, "loss": 1.2924, "lr": 1e-05, "epoch": 0.004592000734720118, "percentage": 0.23, "elapsed_time": "0:00:57", "remaining_time": "6:54:30"}
|
11 |
+
{"current_steps": 11, "total_steps": 4354, "loss": 1.3079, "lr": 1.1000000000000001e-05, "epoch": 0.005051200808192129, "percentage": 0.25, "elapsed_time": "0:01:00", "remaining_time": "6:40:40"}
|
12 |
+
{"current_steps": 12, "total_steps": 4354, "loss": 1.2329, "lr": 1.2e-05, "epoch": 0.005510400881664141, "percentage": 0.28, "elapsed_time": "0:01:04", "remaining_time": "6:28:55"}
|
13 |
+
{"current_steps": 13, "total_steps": 4354, "loss": 1.309, "lr": 1.3000000000000001e-05, "epoch": 0.005969600955136153, "percentage": 0.3, "elapsed_time": "0:01:08", "remaining_time": "6:19:46"}
|
14 |
+
{"current_steps": 14, "total_steps": 4354, "loss": 1.4139, "lr": 1.4000000000000001e-05, "epoch": 0.006428801028608165, "percentage": 0.32, "elapsed_time": "0:01:12", "remaining_time": "6:12:14"}
|
15 |
+
{"current_steps": 15, "total_steps": 4354, "loss": 1.3265, "lr": 1.5e-05, "epoch": 0.006888001102080176, "percentage": 0.34, "elapsed_time": "0:01:15", "remaining_time": "6:04:32"}
|
16 |
+
{"current_steps": 16, "total_steps": 4354, "loss": 1.3244, "lr": 1.6000000000000003e-05, "epoch": 0.0073472011755521885, "percentage": 0.37, "elapsed_time": "0:01:19", "remaining_time": "5:59:04"}
|
17 |
+
{"current_steps": 17, "total_steps": 4354, "loss": 1.4398, "lr": 1.7000000000000003e-05, "epoch": 0.0078064012490242, "percentage": 0.39, "elapsed_time": "0:01:23", "remaining_time": "5:53:19"}
|
18 |
+
{"current_steps": 18, "total_steps": 4354, "loss": 1.3425, "lr": 1.8e-05, "epoch": 0.008265601322496211, "percentage": 0.41, "elapsed_time": "0:01:26", "remaining_time": "5:48:28"}
|
19 |
+
{"current_steps": 19, "total_steps": 4354, "loss": 1.3846, "lr": 1.9e-05, "epoch": 0.008724801395968223, "percentage": 0.44, "elapsed_time": "0:01:30", "remaining_time": "5:44:19"}
|
20 |
+
{"current_steps": 20, "total_steps": 4354, "loss": 1.3338, "lr": 2e-05, "epoch": 0.009184001469440236, "percentage": 0.46, "elapsed_time": "0:01:34", "remaining_time": "5:39:51"}
|
21 |
+
{"current_steps": 21, "total_steps": 4354, "loss": 1.3299, "lr": 2.1e-05, "epoch": 0.009643201542912247, "percentage": 0.48, "elapsed_time": "0:01:37", "remaining_time": "5:36:15"}
|
22 |
+
{"current_steps": 22, "total_steps": 4354, "loss": 1.3151, "lr": 2.2000000000000003e-05, "epoch": 0.010102401616384258, "percentage": 0.51, "elapsed_time": "0:01:41", "remaining_time": "5:33:02"}
|
23 |
+
{"current_steps": 23, "total_steps": 4354, "loss": 1.2471, "lr": 2.3000000000000003e-05, "epoch": 0.01056160168985627, "percentage": 0.53, "elapsed_time": "0:01:45", "remaining_time": "5:30:52"}
|
24 |
+
{"current_steps": 24, "total_steps": 4354, "loss": 1.2738, "lr": 2.4e-05, "epoch": 0.011020801763328283, "percentage": 0.55, "elapsed_time": "0:01:49", "remaining_time": "5:27:54"}
|
25 |
+
{"current_steps": 25, "total_steps": 4354, "loss": 1.4082, "lr": 2.5e-05, "epoch": 0.011480001836800294, "percentage": 0.57, "elapsed_time": "0:01:52", "remaining_time": "5:25:14"}
|
26 |
+
{"current_steps": 26, "total_steps": 4354, "loss": 1.2626, "lr": 2.6000000000000002e-05, "epoch": 0.011939201910272305, "percentage": 0.6, "elapsed_time": "0:01:56", "remaining_time": "5:23:28"}
|
27 |
+
{"current_steps": 27, "total_steps": 4354, "loss": 1.3043, "lr": 2.7000000000000002e-05, "epoch": 0.012398401983744317, "percentage": 0.62, "elapsed_time": "0:02:00", "remaining_time": "5:21:20"}
|
28 |
+
{"current_steps": 28, "total_steps": 4354, "loss": 1.3183, "lr": 2.8000000000000003e-05, "epoch": 0.01285760205721633, "percentage": 0.64, "elapsed_time": "0:02:04", "remaining_time": "5:19:24"}
|
29 |
+
{"current_steps": 29, "total_steps": 4354, "loss": 1.3234, "lr": 2.9e-05, "epoch": 0.013316802130688341, "percentage": 0.67, "elapsed_time": "0:02:07", "remaining_time": "5:17:22"}
|
30 |
+
{"current_steps": 30, "total_steps": 4354, "loss": 1.2934, "lr": 3e-05, "epoch": 0.013776002204160353, "percentage": 0.69, "elapsed_time": "0:02:11", "remaining_time": "5:15:39"}
|
31 |
+
{"current_steps": 31, "total_steps": 4354, "loss": 1.32, "lr": 3.1e-05, "epoch": 0.014235202277632364, "percentage": 0.71, "elapsed_time": "0:02:15", "remaining_time": "5:14:11"}
|
32 |
+
{"current_steps": 32, "total_steps": 4354, "loss": 1.3079, "lr": 3.2000000000000005e-05, "epoch": 0.014694402351104377, "percentage": 0.73, "elapsed_time": "0:02:18", "remaining_time": "5:12:45"}
|
33 |
+
{"current_steps": 33, "total_steps": 4354, "loss": 1.3176, "lr": 3.3e-05, "epoch": 0.015153602424576388, "percentage": 0.76, "elapsed_time": "0:02:22", "remaining_time": "5:11:13"}
|
34 |
+
{"current_steps": 34, "total_steps": 4354, "loss": 1.3174, "lr": 3.4000000000000007e-05, "epoch": 0.0156128024980484, "percentage": 0.78, "elapsed_time": "0:02:26", "remaining_time": "5:09:39"}
|
35 |
+
{"current_steps": 35, "total_steps": 4354, "loss": 1.2815, "lr": 3.5e-05, "epoch": 0.016072002571520413, "percentage": 0.8, "elapsed_time": "0:02:30", "remaining_time": "5:08:39"}
|
36 |
+
{"current_steps": 36, "total_steps": 4354, "loss": 1.297, "lr": 3.6e-05, "epoch": 0.016531202644992422, "percentage": 0.83, "elapsed_time": "0:02:33", "remaining_time": "5:07:46"}
|
37 |
+
{"current_steps": 37, "total_steps": 4354, "loss": 1.3368, "lr": 3.7e-05, "epoch": 0.016990402718464435, "percentage": 0.85, "elapsed_time": "0:02:37", "remaining_time": "5:06:16"}
|
38 |
+
{"current_steps": 38, "total_steps": 4354, "loss": 1.3747, "lr": 3.8e-05, "epoch": 0.017449602791936445, "percentage": 0.87, "elapsed_time": "0:02:41", "remaining_time": "5:05:16"}
|
39 |
+
{"current_steps": 39, "total_steps": 4354, "loss": 1.3036, "lr": 3.9000000000000006e-05, "epoch": 0.017908802865408458, "percentage": 0.9, "elapsed_time": "0:02:45", "remaining_time": "5:04:21"}
|
40 |
+
{"current_steps": 40, "total_steps": 4354, "loss": 1.3682, "lr": 4e-05, "epoch": 0.01836800293888047, "percentage": 0.92, "elapsed_time": "0:02:48", "remaining_time": "5:03:15"}
|
41 |
+
{"current_steps": 41, "total_steps": 4354, "loss": 1.3134, "lr": 4.1e-05, "epoch": 0.01882720301235248, "percentage": 0.94, "elapsed_time": "0:02:54", "remaining_time": "5:05:16"}
|
42 |
+
{"current_steps": 42, "total_steps": 4354, "loss": 1.3683, "lr": 4.2e-05, "epoch": 0.019286403085824494, "percentage": 0.96, "elapsed_time": "0:02:57", "remaining_time": "5:04:12"}
|
43 |
+
{"current_steps": 43, "total_steps": 4354, "loss": 1.3823, "lr": 4.3e-05, "epoch": 0.019745603159296507, "percentage": 0.99, "elapsed_time": "0:03:01", "remaining_time": "5:03:24"}
|
44 |
+
{"current_steps": 44, "total_steps": 4354, "loss": 1.3199, "lr": 4.4000000000000006e-05, "epoch": 0.020204803232768517, "percentage": 1.01, "elapsed_time": "0:03:06", "remaining_time": "5:03:43"}
|
45 |
+
{"current_steps": 45, "total_steps": 4354, "loss": 1.3567, "lr": 4.5e-05, "epoch": 0.02066400330624053, "percentage": 1.03, "elapsed_time": "0:03:09", "remaining_time": "5:03:09"}
|
46 |
+
{"current_steps": 46, "total_steps": 4354, "loss": 1.4144, "lr": 4.600000000000001e-05, "epoch": 0.02112320337971254, "percentage": 1.06, "elapsed_time": "0:03:13", "remaining_time": "5:02:27"}
|
47 |
+
{"current_steps": 47, "total_steps": 4354, "loss": 1.2648, "lr": 4.7e-05, "epoch": 0.021582403453184552, "percentage": 1.08, "elapsed_time": "0:03:17", "remaining_time": "5:01:58"}
|
48 |
+
{"current_steps": 48, "total_steps": 4354, "loss": 1.4216, "lr": 4.8e-05, "epoch": 0.022041603526656565, "percentage": 1.1, "elapsed_time": "0:03:21", "remaining_time": "5:01:19"}
|
49 |
+
{"current_steps": 49, "total_steps": 4354, "loss": 1.3791, "lr": 4.9e-05, "epoch": 0.022500803600128575, "percentage": 1.13, "elapsed_time": "0:03:25", "remaining_time": "5:00:29"}
|
50 |
+
{"current_steps": 50, "total_steps": 4354, "loss": 1.3768, "lr": 5e-05, "epoch": 0.022960003673600588, "percentage": 1.15, "elapsed_time": "0:03:29", "remaining_time": "4:59:56"}
|
51 |
+
{"current_steps": 51, "total_steps": 4354, "loss": 1.3168, "lr": 4.9999993340138253e-05, "epoch": 0.0234192037470726, "percentage": 1.17, "elapsed_time": "0:03:32", "remaining_time": "4:59:20"}
|
52 |
+
{"current_steps": 52, "total_steps": 4354, "loss": 1.3193, "lr": 4.999997336055656e-05, "epoch": 0.02387840382054461, "percentage": 1.19, "elapsed_time": "0:03:36", "remaining_time": "4:58:30"}
|
53 |
+
{"current_steps": 53, "total_steps": 4354, "loss": 1.3256, "lr": 4.999994006126555e-05, "epoch": 0.024337603894016624, "percentage": 1.22, "elapsed_time": "0:03:40", "remaining_time": "4:58:44"}
|
54 |
+
{"current_steps": 54, "total_steps": 4354, "loss": 1.3745, "lr": 4.9999893442282986e-05, "epoch": 0.024796803967488634, "percentage": 1.24, "elapsed_time": "0:03:44", "remaining_time": "4:57:54"}
|
55 |
+
{"current_steps": 55, "total_steps": 4354, "loss": 1.3414, "lr": 4.99998335036337e-05, "epoch": 0.025256004040960647, "percentage": 1.26, "elapsed_time": "0:03:48", "remaining_time": "4:57:15"}
|
56 |
+
{"current_steps": 56, "total_steps": 4354, "loss": 1.3981, "lr": 4.999976024534962e-05, "epoch": 0.02571520411443266, "percentage": 1.29, "elapsed_time": "0:03:52", "remaining_time": "4:56:47"}
|
57 |
+
{"current_steps": 57, "total_steps": 4354, "loss": 1.4594, "lr": 4.9999673667469783e-05, "epoch": 0.02617440418790467, "percentage": 1.31, "elapsed_time": "0:03:55", "remaining_time": "4:56:10"}
|
58 |
+
{"current_steps": 58, "total_steps": 4354, "loss": 1.3603, "lr": 4.999957377004031e-05, "epoch": 0.026633604261376682, "percentage": 1.33, "elapsed_time": "0:03:59", "remaining_time": "4:55:40"}
|
59 |
+
{"current_steps": 59, "total_steps": 4354, "loss": 1.3368, "lr": 4.999946055311444e-05, "epoch": 0.027092804334848692, "percentage": 1.36, "elapsed_time": "0:04:03", "remaining_time": "4:55:03"}
|
60 |
+
{"current_steps": 60, "total_steps": 4354, "loss": 1.3206, "lr": 4.9999334016752476e-05, "epoch": 0.027552004408320705, "percentage": 1.38, "elapsed_time": "0:04:07", "remaining_time": "4:54:43"}
|
61 |
+
{"current_steps": 61, "total_steps": 4354, "loss": 1.3657, "lr": 4.999919416102184e-05, "epoch": 0.028011204481792718, "percentage": 1.4, "elapsed_time": "0:04:10", "remaining_time": "4:54:03"}
|
62 |
+
{"current_steps": 62, "total_steps": 4354, "loss": 1.4203, "lr": 4.9999040985997054e-05, "epoch": 0.028470404555264728, "percentage": 1.42, "elapsed_time": "0:04:14", "remaining_time": "4:53:27"}
|
63 |
+
{"current_steps": 63, "total_steps": 4354, "loss": 1.3436, "lr": 4.999887449175972e-05, "epoch": 0.02892960462873674, "percentage": 1.45, "elapsed_time": "0:04:18", "remaining_time": "4:52:58"}
|
64 |
+
{"current_steps": 64, "total_steps": 4354, "loss": 1.3111, "lr": 4.999869467839855e-05, "epoch": 0.029388804702208754, "percentage": 1.47, "elapsed_time": "0:04:21", "remaining_time": "4:52:36"}
|
65 |
+
{"current_steps": 65, "total_steps": 4354, "loss": 1.3866, "lr": 4.999850154600934e-05, "epoch": 0.029848004775680764, "percentage": 1.49, "elapsed_time": "0:04:25", "remaining_time": "4:52:06"}
|
66 |
+
{"current_steps": 66, "total_steps": 4354, "loss": 1.2843, "lr": 4.999829509469499e-05, "epoch": 0.030307204849152777, "percentage": 1.52, "elapsed_time": "0:04:29", "remaining_time": "4:51:39"}
|
67 |
+
{"current_steps": 67, "total_steps": 4354, "loss": 1.3595, "lr": 4.9998075324565505e-05, "epoch": 0.030766404922624786, "percentage": 1.54, "elapsed_time": "0:04:33", "remaining_time": "4:51:21"}
|
68 |
+
{"current_steps": 68, "total_steps": 4354, "loss": 1.3464, "lr": 4.999784223573797e-05, "epoch": 0.0312256049960968, "percentage": 1.56, "elapsed_time": "0:04:37", "remaining_time": "4:50:59"}
|
69 |
+
{"current_steps": 69, "total_steps": 4354, "loss": 1.3117, "lr": 4.999759582833656e-05, "epoch": 0.03168480506956881, "percentage": 1.58, "elapsed_time": "0:04:40", "remaining_time": "4:50:35"}
|
70 |
+
{"current_steps": 70, "total_steps": 4354, "loss": 1.4115, "lr": 4.9997336102492574e-05, "epoch": 0.032144005143040826, "percentage": 1.61, "elapsed_time": "0:04:44", "remaining_time": "4:50:11"}
|
71 |
+
{"current_steps": 71, "total_steps": 4354, "loss": 1.3594, "lr": 4.999706305834438e-05, "epoch": 0.03260320521651283, "percentage": 1.63, "elapsed_time": "0:04:48", "remaining_time": "4:49:53"}
|
72 |
+
{"current_steps": 72, "total_steps": 4354, "loss": 1.3219, "lr": 4.9996776696037476e-05, "epoch": 0.033062405289984845, "percentage": 1.65, "elapsed_time": "0:04:52", "remaining_time": "4:49:54"}
|
73 |
+
{"current_steps": 73, "total_steps": 4354, "loss": 1.418, "lr": 4.99964770157244e-05, "epoch": 0.03352160536345686, "percentage": 1.68, "elapsed_time": "0:04:56", "remaining_time": "4:49:35"}
|
74 |
+
{"current_steps": 74, "total_steps": 4354, "loss": 1.2797, "lr": 4.9996164017564837e-05, "epoch": 0.03398080543692887, "percentage": 1.7, "elapsed_time": "0:05:00", "remaining_time": "4:49:15"}
|
75 |
+
{"current_steps": 75, "total_steps": 4354, "loss": 1.2866, "lr": 4.9995837701725544e-05, "epoch": 0.034440005510400884, "percentage": 1.72, "elapsed_time": "0:05:04", "remaining_time": "4:49:17"}
|
76 |
+
{"current_steps": 76, "total_steps": 4354, "loss": 1.3651, "lr": 4.9995498068380374e-05, "epoch": 0.03489920558387289, "percentage": 1.75, "elapsed_time": "0:05:07", "remaining_time": "4:48:52"}
|
77 |
+
{"current_steps": 77, "total_steps": 4354, "loss": 1.3065, "lr": 4.999514511771029e-05, "epoch": 0.0353584056573449, "percentage": 1.77, "elapsed_time": "0:05:11", "remaining_time": "4:48:25"}
|
78 |
+
{"current_steps": 78, "total_steps": 4354, "loss": 1.4687, "lr": 4.999477884990334e-05, "epoch": 0.035817605730816916, "percentage": 1.79, "elapsed_time": "0:05:15", "remaining_time": "4:47:57"}
|
79 |
+
{"current_steps": 79, "total_steps": 4354, "loss": 1.3369, "lr": 4.9994399265154666e-05, "epoch": 0.03627680580428893, "percentage": 1.81, "elapsed_time": "0:05:19", "remaining_time": "4:47:56"}
|
80 |
+
{"current_steps": 80, "total_steps": 4354, "loss": 1.3576, "lr": 4.99940063636665e-05, "epoch": 0.03673600587776094, "percentage": 1.84, "elapsed_time": "0:05:23", "remaining_time": "4:47:48"}
|
81 |
+
{"current_steps": 81, "total_steps": 4354, "loss": 1.4062, "lr": 4.999360014564819e-05, "epoch": 0.037195205951232956, "percentage": 1.86, "elapsed_time": "0:05:26", "remaining_time": "4:47:22"}
|
82 |
+
{"current_steps": 82, "total_steps": 4354, "loss": 1.3121, "lr": 4.999318061131614e-05, "epoch": 0.03765440602470496, "percentage": 1.88, "elapsed_time": "0:05:30", "remaining_time": "4:47:03"}
|
83 |
+
{"current_steps": 83, "total_steps": 4354, "loss": 1.3783, "lr": 4.999274776089389e-05, "epoch": 0.038113606098176975, "percentage": 1.91, "elapsed_time": "0:05:34", "remaining_time": "4:46:36"}
|
84 |
+
{"current_steps": 84, "total_steps": 4354, "loss": 1.3433, "lr": 4.9992301594612055e-05, "epoch": 0.03857280617164899, "percentage": 1.93, "elapsed_time": "0:05:37", "remaining_time": "4:46:09"}
|
85 |
+
{"current_steps": 85, "total_steps": 4354, "loss": 1.4302, "lr": 4.999184211270835e-05, "epoch": 0.039032006245121, "percentage": 1.95, "elapsed_time": "0:05:41", "remaining_time": "4:45:52"}
|
86 |
+
{"current_steps": 86, "total_steps": 4354, "loss": 1.3039, "lr": 4.999136931542758e-05, "epoch": 0.039491206318593014, "percentage": 1.98, "elapsed_time": "0:05:45", "remaining_time": "4:45:40"}
|
87 |
+
{"current_steps": 87, "total_steps": 4354, "loss": 1.3584, "lr": 4.999088320302166e-05, "epoch": 0.03995040639206502, "percentage": 2.0, "elapsed_time": "0:05:49", "remaining_time": "4:45:45"}
|
88 |
+
{"current_steps": 88, "total_steps": 4354, "loss": 1.2915, "lr": 4.999038377574955e-05, "epoch": 0.04040960646553703, "percentage": 2.02, "elapsed_time": "0:05:53", "remaining_time": "4:45:24"}
|
89 |
+
{"current_steps": 89, "total_steps": 4354, "loss": 1.4106, "lr": 4.998987103387737e-05, "epoch": 0.040868806539009046, "percentage": 2.04, "elapsed_time": "0:05:56", "remaining_time": "4:45:02"}
|
90 |
+
{"current_steps": 90, "total_steps": 4354, "loss": 1.3344, "lr": 4.9989344977678285e-05, "epoch": 0.04132800661248106, "percentage": 2.07, "elapsed_time": "0:06:00", "remaining_time": "4:44:43"}
|
91 |
+
{"current_steps": 91, "total_steps": 4354, "loss": 1.2863, "lr": 4.998880560743259e-05, "epoch": 0.04178720668595307, "percentage": 2.09, "elapsed_time": "0:06:04", "remaining_time": "4:44:27"}
|
92 |
+
{"current_steps": 92, "total_steps": 4354, "loss": 1.3271, "lr": 4.9988252923427634e-05, "epoch": 0.04224640675942508, "percentage": 2.11, "elapsed_time": "0:06:08", "remaining_time": "4:44:13"}
|
93 |
+
{"current_steps": 93, "total_steps": 4354, "loss": 1.3344, "lr": 4.998768692595791e-05, "epoch": 0.04270560683289709, "percentage": 2.14, "elapsed_time": "0:06:11", "remaining_time": "4:43:56"}
|
94 |
+
{"current_steps": 94, "total_steps": 4354, "loss": 1.2922, "lr": 4.9987107615324944e-05, "epoch": 0.043164806906369105, "percentage": 2.16, "elapsed_time": "0:06:15", "remaining_time": "4:43:41"}
|
95 |
+
{"current_steps": 95, "total_steps": 4354, "loss": 1.4438, "lr": 4.998651499183741e-05, "epoch": 0.04362400697984112, "percentage": 2.18, "elapsed_time": "0:06:19", "remaining_time": "4:43:24"}
|
96 |
+
{"current_steps": 96, "total_steps": 4354, "loss": 1.3937, "lr": 4.998590905581104e-05, "epoch": 0.04408320705331313, "percentage": 2.2, "elapsed_time": "0:06:23", "remaining_time": "4:43:15"}
|
97 |
+
{"current_steps": 97, "total_steps": 4354, "loss": 1.3546, "lr": 4.9985289807568666e-05, "epoch": 0.04454240712678514, "percentage": 2.23, "elapsed_time": "0:06:26", "remaining_time": "4:42:58"}
|
98 |
+
{"current_steps": 98, "total_steps": 4354, "loss": 1.4719, "lr": 4.998465724744022e-05, "epoch": 0.04500160720025715, "percentage": 2.25, "elapsed_time": "0:06:30", "remaining_time": "4:42:43"}
|
99 |
+
{"current_steps": 99, "total_steps": 4354, "loss": 1.3202, "lr": 4.998401137576274e-05, "epoch": 0.04546080727372916, "percentage": 2.27, "elapsed_time": "0:06:34", "remaining_time": "4:42:33"}
|
100 |
+
{"current_steps": 100, "total_steps": 4354, "loss": 1.3113, "lr": 4.998335219288032e-05, "epoch": 0.045920007347201176, "percentage": 2.3, "elapsed_time": "0:06:38", "remaining_time": "4:42:15"}
|
101 |
+
{"current_steps": 101, "total_steps": 4354, "loss": 1.4094, "lr": 4.998267969914417e-05, "epoch": 0.04637920742067319, "percentage": 2.32, "elapsed_time": "0:06:41", "remaining_time": "4:41:59"}
|
102 |
+
{"current_steps": 102, "total_steps": 4354, "loss": 1.3257, "lr": 4.998199389491257e-05, "epoch": 0.0468384074941452, "percentage": 2.34, "elapsed_time": "0:06:45", "remaining_time": "4:41:47"}
|
103 |
+
{"current_steps": 103, "total_steps": 4354, "loss": 1.2924, "lr": 4.998129478055095e-05, "epoch": 0.04729760756761721, "percentage": 2.37, "elapsed_time": "0:06:49", "remaining_time": "4:41:37"}
|
104 |
+
{"current_steps": 104, "total_steps": 4354, "loss": 1.4036, "lr": 4.998058235643175e-05, "epoch": 0.04775680764108922, "percentage": 2.39, "elapsed_time": "0:06:53", "remaining_time": "4:41:18"}
|
105 |
+
{"current_steps": 105, "total_steps": 4354, "loss": 1.2819, "lr": 4.9979856622934564e-05, "epoch": 0.048216007714561235, "percentage": 2.41, "elapsed_time": "0:06:57", "remaining_time": "4:41:19"}
|
106 |
+
{"current_steps": 106, "total_steps": 4354, "loss": 1.3171, "lr": 4.997911758044605e-05, "epoch": 0.04867520778803325, "percentage": 2.43, "elapsed_time": "0:07:00", "remaining_time": "4:41:04"}
|
107 |
+
{"current_steps": 107, "total_steps": 4354, "loss": 1.3574, "lr": 4.997836522935996e-05, "epoch": 0.04913440786150526, "percentage": 2.46, "elapsed_time": "0:07:04", "remaining_time": "4:40:44"}
|
108 |
+
{"current_steps": 108, "total_steps": 4354, "loss": 1.2778, "lr": 4.997759957007714e-05, "epoch": 0.04959360793497727, "percentage": 2.48, "elapsed_time": "0:07:08", "remaining_time": "4:40:51"}
|
109 |
+
{"current_steps": 109, "total_steps": 4354, "loss": 1.3896, "lr": 4.997682060300553e-05, "epoch": 0.05005280800844928, "percentage": 2.5, "elapsed_time": "0:07:12", "remaining_time": "4:40:44"}
|
110 |
+
{"current_steps": 110, "total_steps": 4354, "loss": 1.402, "lr": 4.997602832856013e-05, "epoch": 0.05051200808192129, "percentage": 2.53, "elapsed_time": "0:07:16", "remaining_time": "4:40:40"}
|
111 |
+
{"current_steps": 111, "total_steps": 4354, "loss": 1.3787, "lr": 4.997522274716309e-05, "epoch": 0.050971208155393306, "percentage": 2.55, "elapsed_time": "0:07:20", "remaining_time": "4:40:42"}
|
112 |
+
{"current_steps": 112, "total_steps": 4354, "loss": 1.3412, "lr": 4.997440385924359e-05, "epoch": 0.05143040822886532, "percentage": 2.57, "elapsed_time": "0:07:24", "remaining_time": "4:40:29"}
|
113 |
+
{"current_steps": 113, "total_steps": 4354, "loss": 1.2783, "lr": 4.997357166523794e-05, "epoch": 0.051889608302337326, "percentage": 2.6, "elapsed_time": "0:07:28", "remaining_time": "4:40:35"}
|
114 |
+
{"current_steps": 114, "total_steps": 4354, "loss": 1.3426, "lr": 4.9972726165589515e-05, "epoch": 0.05234880837580934, "percentage": 2.62, "elapsed_time": "0:07:32", "remaining_time": "4:40:17"}
|
115 |
+
{"current_steps": 115, "total_steps": 4354, "loss": 1.3072, "lr": 4.997186736074879e-05, "epoch": 0.05280800844928135, "percentage": 2.64, "elapsed_time": "0:07:35", "remaining_time": "4:40:04"}
|
116 |
+
{"current_steps": 116, "total_steps": 4354, "loss": 1.3492, "lr": 4.997099525117332e-05, "epoch": 0.053267208522753365, "percentage": 2.66, "elapsed_time": "0:07:39", "remaining_time": "4:39:52"}
|
117 |
+
{"current_steps": 117, "total_steps": 4354, "loss": 1.395, "lr": 4.9970109837327775e-05, "epoch": 0.05372640859622538, "percentage": 2.69, "elapsed_time": "0:07:44", "remaining_time": "4:40:30"}
|
118 |
+
{"current_steps": 118, "total_steps": 4354, "loss": 1.4371, "lr": 4.996921111968387e-05, "epoch": 0.054185608669697384, "percentage": 2.71, "elapsed_time": "0:07:48", "remaining_time": "4:40:15"}
|
119 |
+
{"current_steps": 119, "total_steps": 4354, "loss": 1.343, "lr": 4.996829909872045e-05, "epoch": 0.0546448087431694, "percentage": 2.73, "elapsed_time": "0:07:52", "remaining_time": "4:40:01"}
|
120 |
+
{"current_steps": 120, "total_steps": 4354, "loss": 1.2427, "lr": 4.996737377492342e-05, "epoch": 0.05510400881664141, "percentage": 2.76, "elapsed_time": "0:07:56", "remaining_time": "4:40:17"}
|
121 |
+
{"current_steps": 121, "total_steps": 4354, "loss": 1.2891, "lr": 4.996643514878578e-05, "epoch": 0.05556320889011342, "percentage": 2.78, "elapsed_time": "0:08:00", "remaining_time": "4:39:59"}
|
122 |
+
{"current_steps": 122, "total_steps": 4354, "loss": 1.3337, "lr": 4.9965483220807627e-05, "epoch": 0.056022408963585436, "percentage": 2.8, "elapsed_time": "0:08:03", "remaining_time": "4:39:42"}
|
123 |
+
{"current_steps": 123, "total_steps": 4354, "loss": 1.3042, "lr": 4.9964517991496137e-05, "epoch": 0.05648160903705744, "percentage": 2.82, "elapsed_time": "0:08:07", "remaining_time": "4:39:41"}
|
124 |
+
{"current_steps": 124, "total_steps": 4354, "loss": 1.4047, "lr": 4.996353946136557e-05, "epoch": 0.056940809110529456, "percentage": 2.85, "elapsed_time": "0:08:11", "remaining_time": "4:39:26"}
|
125 |
+
{"current_steps": 125, "total_steps": 4354, "loss": 1.317, "lr": 4.9962547630937275e-05, "epoch": 0.05740000918400147, "percentage": 2.87, "elapsed_time": "0:08:15", "remaining_time": "4:39:14"}
|
126 |
+
{"current_steps": 126, "total_steps": 4354, "loss": 1.3812, "lr": 4.996154250073969e-05, "epoch": 0.05785920925747348, "percentage": 2.89, "elapsed_time": "0:08:19", "remaining_time": "4:39:05"}
|
127 |
+
{"current_steps": 127, "total_steps": 4354, "loss": 1.3448, "lr": 4.996052407130833e-05, "epoch": 0.058318409330945495, "percentage": 2.92, "elapsed_time": "0:08:22", "remaining_time": "4:38:59"}
|
128 |
+
{"current_steps": 128, "total_steps": 4354, "loss": 1.2553, "lr": 4.9959492343185834e-05, "epoch": 0.05877760940441751, "percentage": 2.94, "elapsed_time": "0:08:26", "remaining_time": "4:38:45"}
|
129 |
+
{"current_steps": 129, "total_steps": 4354, "loss": 1.2989, "lr": 4.995844731692185e-05, "epoch": 0.059236809477889514, "percentage": 2.96, "elapsed_time": "0:08:30", "remaining_time": "4:38:32"}
|
130 |
+
{"current_steps": 130, "total_steps": 4354, "loss": 1.3442, "lr": 4.995738899307319e-05, "epoch": 0.05969600955136153, "percentage": 2.99, "elapsed_time": "0:08:34", "remaining_time": "4:38:21"}
|
131 |
+
{"current_steps": 131, "total_steps": 4354, "loss": 1.3869, "lr": 4.99563173722037e-05, "epoch": 0.06015520962483354, "percentage": 3.01, "elapsed_time": "0:08:37", "remaining_time": "4:38:16"}
|
132 |
+
{"current_steps": 132, "total_steps": 4354, "loss": 1.4096, "lr": 4.995523245488434e-05, "epoch": 0.06061440969830555, "percentage": 3.03, "elapsed_time": "0:08:41", "remaining_time": "4:38:06"}
|
133 |
+
{"current_steps": 133, "total_steps": 4354, "loss": 1.3141, "lr": 4.995413424169313e-05, "epoch": 0.061073609771777566, "percentage": 3.05, "elapsed_time": "0:08:45", "remaining_time": "4:37:56"}
|
134 |
+
{"current_steps": 134, "total_steps": 4354, "loss": 1.3398, "lr": 4.995302273321519e-05, "epoch": 0.06153280984524957, "percentage": 3.08, "elapsed_time": "0:08:49", "remaining_time": "4:37:54"}
|
135 |
+
{"current_steps": 135, "total_steps": 4354, "loss": 1.3507, "lr": 4.995189793004272e-05, "epoch": 0.061992009918721586, "percentage": 3.1, "elapsed_time": "0:08:53", "remaining_time": "4:37:46"}
|
136 |
+
{"current_steps": 136, "total_steps": 4354, "loss": 1.348, "lr": 4.995075983277501e-05, "epoch": 0.0624512099921936, "percentage": 3.12, "elapsed_time": "0:08:57", "remaining_time": "4:37:43"}
|
137 |
+
{"current_steps": 137, "total_steps": 4354, "loss": 1.3355, "lr": 4.994960844201841e-05, "epoch": 0.06291041006566561, "percentage": 3.15, "elapsed_time": "0:09:01", "remaining_time": "4:37:46"}
|
138 |
+
{"current_steps": 138, "total_steps": 4354, "loss": 1.4539, "lr": 4.9948443758386384e-05, "epoch": 0.06336961013913762, "percentage": 3.17, "elapsed_time": "0:09:04", "remaining_time": "4:37:29"}
|
139 |
+
{"current_steps": 139, "total_steps": 4354, "loss": 1.3626, "lr": 4.994726578249946e-05, "epoch": 0.06382881021260964, "percentage": 3.19, "elapsed_time": "0:09:08", "remaining_time": "4:37:24"}
|
140 |
+
{"current_steps": 140, "total_steps": 4354, "loss": 1.3372, "lr": 4.994607451498524e-05, "epoch": 0.06428801028608165, "percentage": 3.22, "elapsed_time": "0:09:12", "remaining_time": "4:37:15"}
|
141 |
+
{"current_steps": 141, "total_steps": 4354, "loss": 1.3572, "lr": 4.9944869956478425e-05, "epoch": 0.06474721035955366, "percentage": 3.24, "elapsed_time": "0:09:16", "remaining_time": "4:37:06"}
|
142 |
+
{"current_steps": 142, "total_steps": 4354, "loss": 1.2921, "lr": 4.994365210762079e-05, "epoch": 0.06520641043302566, "percentage": 3.26, "elapsed_time": "0:09:20", "remaining_time": "4:36:52"}
|
143 |
+
{"current_steps": 143, "total_steps": 4354, "loss": 1.3875, "lr": 4.9942420969061196e-05, "epoch": 0.06566561050649768, "percentage": 3.28, "elapsed_time": "0:09:23", "remaining_time": "4:36:38"}
|
144 |
+
{"current_steps": 144, "total_steps": 4354, "loss": 1.3841, "lr": 4.994117654145557e-05, "epoch": 0.06612481057996969, "percentage": 3.31, "elapsed_time": "0:09:27", "remaining_time": "4:36:24"}
|
145 |
+
{"current_steps": 145, "total_steps": 4354, "loss": 1.3194, "lr": 4.9939918825466935e-05, "epoch": 0.0665840106534417, "percentage": 3.33, "elapsed_time": "0:09:30", "remaining_time": "4:36:13"}
|
146 |
+
{"current_steps": 146, "total_steps": 4354, "loss": 1.3875, "lr": 4.993864782176539e-05, "epoch": 0.06704321072691372, "percentage": 3.35, "elapsed_time": "0:09:34", "remaining_time": "4:36:05"}
|
147 |
+
{"current_steps": 147, "total_steps": 4354, "loss": 1.3994, "lr": 4.99373635310281e-05, "epoch": 0.06750241080038573, "percentage": 3.38, "elapsed_time": "0:09:38", "remaining_time": "4:36:00"}
|
148 |
+
{"current_steps": 148, "total_steps": 4354, "loss": 1.2841, "lr": 4.993606595393935e-05, "epoch": 0.06796161087385774, "percentage": 3.4, "elapsed_time": "0:09:42", "remaining_time": "4:35:52"}
|
149 |
+
{"current_steps": 149, "total_steps": 4354, "loss": 1.3448, "lr": 4.993475509119044e-05, "epoch": 0.06842081094732975, "percentage": 3.42, "elapsed_time": "0:09:46", "remaining_time": "4:35:43"}
|
150 |
+
{"current_steps": 150, "total_steps": 4354, "loss": 1.4078, "lr": 4.99334309434798e-05, "epoch": 0.06888001102080177, "percentage": 3.45, "elapsed_time": "0:09:49", "remaining_time": "4:35:31"}
|
151 |
+
{"current_steps": 151, "total_steps": 4354, "loss": 1.3237, "lr": 4.993209351151292e-05, "epoch": 0.06933921109427378, "percentage": 3.47, "elapsed_time": "0:10:00", "remaining_time": "4:38:27"}
|
152 |
+
{"current_steps": 152, "total_steps": 4354, "loss": 1.2682, "lr": 4.993074279600237e-05, "epoch": 0.06979841116774578, "percentage": 3.49, "elapsed_time": "0:10:03", "remaining_time": "4:38:14"}
|
153 |
+
{"current_steps": 153, "total_steps": 4354, "loss": 1.3408, "lr": 4.9929378797667796e-05, "epoch": 0.0702576112412178, "percentage": 3.51, "elapsed_time": "0:10:07", "remaining_time": "4:38:08"}
|
154 |
+
{"current_steps": 154, "total_steps": 4354, "loss": 1.3021, "lr": 4.992800151723592e-05, "epoch": 0.0707168113146898, "percentage": 3.54, "elapsed_time": "0:10:11", "remaining_time": "4:37:58"}
|
155 |
+
{"current_steps": 155, "total_steps": 4354, "loss": 1.3997, "lr": 4.9926610955440545e-05, "epoch": 0.07117601138816182, "percentage": 3.56, "elapsed_time": "0:10:15", "remaining_time": "4:37:44"}
|
156 |
+
{"current_steps": 156, "total_steps": 4354, "loss": 1.269, "lr": 4.992520711302254e-05, "epoch": 0.07163521146163383, "percentage": 3.58, "elapsed_time": "0:10:19", "remaining_time": "4:37:37"}
|
157 |
+
{"current_steps": 157, "total_steps": 4354, "loss": 1.378, "lr": 4.992378999072986e-05, "epoch": 0.07209441153510585, "percentage": 3.61, "elapsed_time": "0:10:22", "remaining_time": "4:37:33"}
|
158 |
+
{"current_steps": 158, "total_steps": 4354, "loss": 1.3742, "lr": 4.992235958931753e-05, "epoch": 0.07255361160857786, "percentage": 3.63, "elapsed_time": "0:10:26", "remaining_time": "4:37:27"}
|
159 |
+
{"current_steps": 159, "total_steps": 4354, "loss": 1.3491, "lr": 4.992091590954766e-05, "epoch": 0.07301281168204987, "percentage": 3.65, "elapsed_time": "0:10:30", "remaining_time": "4:37:21"}
|
160 |
+
{"current_steps": 160, "total_steps": 4354, "loss": 1.3206, "lr": 4.991945895218941e-05, "epoch": 0.07347201175552188, "percentage": 3.67, "elapsed_time": "0:10:35", "remaining_time": "4:37:30"}
|
161 |
+
{"current_steps": 161, "total_steps": 4354, "loss": 1.3445, "lr": 4.991798871801906e-05, "epoch": 0.0739312118289939, "percentage": 3.7, "elapsed_time": "0:10:38", "remaining_time": "4:37:19"}
|
162 |
+
{"current_steps": 162, "total_steps": 4354, "loss": 1.3591, "lr": 4.99165052078199e-05, "epoch": 0.07439041190246591, "percentage": 3.72, "elapsed_time": "0:10:42", "remaining_time": "4:37:11"}
|
163 |
+
{"current_steps": 163, "total_steps": 4354, "loss": 1.3792, "lr": 4.991500842238235e-05, "epoch": 0.07484961197593791, "percentage": 3.74, "elapsed_time": "0:10:46", "remaining_time": "4:37:04"}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7126ebeb2b705efb00b4d663513899e51c325af64373b542f9b6a38cd8b1597
|
3 |
+
size 7608
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|