Training in progress, epoch 3, checkpoint
Browse files- last-checkpoint/global_step1872/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1872/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1872/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1872/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +964 -3
last-checkpoint/global_step1872/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62c1b0737d9ef833cc5447393be9645063199f9c62e660078c52e845aa32896b
|
3 |
+
size 30462473157
|
last-checkpoint/global_step1872/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:817359d4e1fee825f428c471135c1a7e867c2eed7ed945e8a13b0997f00d4daa
|
3 |
+
size 30462473157
|
last-checkpoint/global_step1872/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f20044a320dfaceff8d5f009e24b0c20b18aa9918609113a30e834ccbce7a2a
|
3 |
+
size 168021
|
last-checkpoint/global_step1872/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4c4a2977f7897858d681153444d2ebd90e00463a70a1038df61619876f7f93b
|
3 |
+
size 168021
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1872
|
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80fcbf5f63a031db5e7b7cc0059d8610275f60c9994a6affd53349d73d62303d
|
3 |
size 4877660776
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b40d8045439d7259e9b7354ad01954826a7161688838b7467a7e317daee489fd
|
3 |
size 4932751008
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f96435a411a27508b63821508e0f9ac352278204b2c94d715d3f47b37f0c7e59
|
3 |
size 4330865200
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1cad27ebfd179a88097c9008af3e226adae1d17acab641c6855cd3bc85c3d7d
|
3 |
size 1089994880
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14768
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95430508d31bbe1a66a940e2572bf04addefae3e1c4e861e8657f66d302aa23e
|
3 |
size 14768
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14768
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b70a6983e6085768b4d2b447a8bd35374bce9cf4ea8e8fbefc1260ca2e054a70
|
3 |
size 14768
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b15b652e34e702b048a2fe65a8149c25795b8f29765f41806646e987f007a10
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1914,6 +1914,967 @@
|
|
1914 |
"eval_samples_per_second": 6.892,
|
1915 |
"eval_steps_per_second": 0.448,
|
1916 |
"step": 1248
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1917 |
}
|
1918 |
],
|
1919 |
"logging_steps": 10,
|
@@ -1928,7 +2889,7 @@
|
|
1928 |
"should_evaluate": false,
|
1929 |
"should_log": false,
|
1930 |
"should_save": true,
|
1931 |
-
"should_training_stop":
|
1932 |
},
|
1933 |
"attributes": {}
|
1934 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1872,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1914 |
"eval_samples_per_second": 6.892,
|
1915 |
"eval_steps_per_second": 0.448,
|
1916 |
"step": 1248
|
1917 |
+
},
|
1918 |
+
{
|
1919 |
+
"epoch": 2.003205128205128,
|
1920 |
+
"grad_norm": 2.3679308142140885,
|
1921 |
+
"learning_rate": 1.846793349168646e-07,
|
1922 |
+
"logits/chosen": -0.341796875,
|
1923 |
+
"logits/rejected": -0.46484375,
|
1924 |
+
"logps/chosen": -366.0,
|
1925 |
+
"logps/rejected": -352.0,
|
1926 |
+
"loss": 0.0315,
|
1927 |
+
"rewards/accuracies": 0.987500011920929,
|
1928 |
+
"rewards/chosen": 2.609375,
|
1929 |
+
"rewards/margins": 6.9375,
|
1930 |
+
"rewards/rejected": -4.3125,
|
1931 |
+
"step": 1250
|
1932 |
+
},
|
1933 |
+
{
|
1934 |
+
"epoch": 2.019230769230769,
|
1935 |
+
"grad_norm": 2.590719502204235,
|
1936 |
+
"learning_rate": 1.8171021377672207e-07,
|
1937 |
+
"logits/chosen": -0.25,
|
1938 |
+
"logits/rejected": -0.404296875,
|
1939 |
+
"logps/chosen": -378.0,
|
1940 |
+
"logps/rejected": -364.0,
|
1941 |
+
"loss": 0.0081,
|
1942 |
+
"rewards/accuracies": 1.0,
|
1943 |
+
"rewards/chosen": 2.015625,
|
1944 |
+
"rewards/margins": 6.90625,
|
1945 |
+
"rewards/rejected": -4.875,
|
1946 |
+
"step": 1260
|
1947 |
+
},
|
1948 |
+
{
|
1949 |
+
"epoch": 2.03525641025641,
|
1950 |
+
"grad_norm": 22.175507349451703,
|
1951 |
+
"learning_rate": 1.7874109263657958e-07,
|
1952 |
+
"logits/chosen": -0.2275390625,
|
1953 |
+
"logits/rejected": -0.37890625,
|
1954 |
+
"logps/chosen": -364.0,
|
1955 |
+
"logps/rejected": -384.0,
|
1956 |
+
"loss": 0.0156,
|
1957 |
+
"rewards/accuracies": 0.987500011920929,
|
1958 |
+
"rewards/chosen": 2.0625,
|
1959 |
+
"rewards/margins": 7.375,
|
1960 |
+
"rewards/rejected": -5.3125,
|
1961 |
+
"step": 1270
|
1962 |
+
},
|
1963 |
+
{
|
1964 |
+
"epoch": 2.051282051282051,
|
1965 |
+
"grad_norm": 4.568251724068892,
|
1966 |
+
"learning_rate": 1.7577197149643706e-07,
|
1967 |
+
"logits/chosen": -0.25,
|
1968 |
+
"logits/rejected": -0.482421875,
|
1969 |
+
"logps/chosen": -380.0,
|
1970 |
+
"logps/rejected": -370.0,
|
1971 |
+
"loss": 0.0145,
|
1972 |
+
"rewards/accuracies": 1.0,
|
1973 |
+
"rewards/chosen": 1.953125,
|
1974 |
+
"rewards/margins": 7.46875,
|
1975 |
+
"rewards/rejected": -5.53125,
|
1976 |
+
"step": 1280
|
1977 |
+
},
|
1978 |
+
{
|
1979 |
+
"epoch": 2.0673076923076925,
|
1980 |
+
"grad_norm": 4.904685892165479,
|
1981 |
+
"learning_rate": 1.728028503562945e-07,
|
1982 |
+
"logits/chosen": -0.22265625,
|
1983 |
+
"logits/rejected": -0.466796875,
|
1984 |
+
"logps/chosen": -376.0,
|
1985 |
+
"logps/rejected": -366.0,
|
1986 |
+
"loss": 0.0088,
|
1987 |
+
"rewards/accuracies": 1.0,
|
1988 |
+
"rewards/chosen": 2.515625,
|
1989 |
+
"rewards/margins": 7.3125,
|
1990 |
+
"rewards/rejected": -4.78125,
|
1991 |
+
"step": 1290
|
1992 |
+
},
|
1993 |
+
{
|
1994 |
+
"epoch": 2.0833333333333335,
|
1995 |
+
"grad_norm": 0.6187256370987634,
|
1996 |
+
"learning_rate": 1.6983372921615202e-07,
|
1997 |
+
"logits/chosen": -0.2138671875,
|
1998 |
+
"logits/rejected": -0.458984375,
|
1999 |
+
"logps/chosen": -362.0,
|
2000 |
+
"logps/rejected": -370.0,
|
2001 |
+
"loss": 0.0123,
|
2002 |
+
"rewards/accuracies": 1.0,
|
2003 |
+
"rewards/chosen": 2.265625,
|
2004 |
+
"rewards/margins": 7.625,
|
2005 |
+
"rewards/rejected": -5.375,
|
2006 |
+
"step": 1300
|
2007 |
+
},
|
2008 |
+
{
|
2009 |
+
"epoch": 2.0993589743589745,
|
2010 |
+
"grad_norm": 0.7331904803685036,
|
2011 |
+
"learning_rate": 1.668646080760095e-07,
|
2012 |
+
"logits/chosen": -0.15234375,
|
2013 |
+
"logits/rejected": -0.40625,
|
2014 |
+
"logps/chosen": -354.0,
|
2015 |
+
"logps/rejected": -356.0,
|
2016 |
+
"loss": 0.0061,
|
2017 |
+
"rewards/accuracies": 1.0,
|
2018 |
+
"rewards/chosen": 2.59375,
|
2019 |
+
"rewards/margins": 7.65625,
|
2020 |
+
"rewards/rejected": -5.0625,
|
2021 |
+
"step": 1310
|
2022 |
+
},
|
2023 |
+
{
|
2024 |
+
"epoch": 2.1153846153846154,
|
2025 |
+
"grad_norm": 2.0296944258203036,
|
2026 |
+
"learning_rate": 1.6389548693586697e-07,
|
2027 |
+
"logits/chosen": -0.2412109375,
|
2028 |
+
"logits/rejected": -0.1748046875,
|
2029 |
+
"logps/chosen": -348.0,
|
2030 |
+
"logps/rejected": -348.0,
|
2031 |
+
"loss": 0.0231,
|
2032 |
+
"rewards/accuracies": 0.987500011920929,
|
2033 |
+
"rewards/chosen": 2.484375,
|
2034 |
+
"rewards/margins": 7.125,
|
2035 |
+
"rewards/rejected": -4.625,
|
2036 |
+
"step": 1320
|
2037 |
+
},
|
2038 |
+
{
|
2039 |
+
"epoch": 2.1314102564102564,
|
2040 |
+
"grad_norm": 23.71632295557207,
|
2041 |
+
"learning_rate": 1.6092636579572448e-07,
|
2042 |
+
"logits/chosen": -0.2275390625,
|
2043 |
+
"logits/rejected": -0.3125,
|
2044 |
+
"logps/chosen": -358.0,
|
2045 |
+
"logps/rejected": -342.0,
|
2046 |
+
"loss": 0.019,
|
2047 |
+
"rewards/accuracies": 1.0,
|
2048 |
+
"rewards/chosen": 2.78125,
|
2049 |
+
"rewards/margins": 7.90625,
|
2050 |
+
"rewards/rejected": -5.125,
|
2051 |
+
"step": 1330
|
2052 |
+
},
|
2053 |
+
{
|
2054 |
+
"epoch": 2.1474358974358974,
|
2055 |
+
"grad_norm": 4.284093803737362,
|
2056 |
+
"learning_rate": 1.5795724465558193e-07,
|
2057 |
+
"logits/chosen": -0.2255859375,
|
2058 |
+
"logits/rejected": -0.5546875,
|
2059 |
+
"logps/chosen": -364.0,
|
2060 |
+
"logps/rejected": -384.0,
|
2061 |
+
"loss": 0.0159,
|
2062 |
+
"rewards/accuracies": 1.0,
|
2063 |
+
"rewards/chosen": 2.484375,
|
2064 |
+
"rewards/margins": 7.4375,
|
2065 |
+
"rewards/rejected": -4.9375,
|
2066 |
+
"step": 1340
|
2067 |
+
},
|
2068 |
+
{
|
2069 |
+
"epoch": 2.1634615384615383,
|
2070 |
+
"grad_norm": 0.7862983762455222,
|
2071 |
+
"learning_rate": 1.549881235154394e-07,
|
2072 |
+
"logits/chosen": -0.1435546875,
|
2073 |
+
"logits/rejected": -0.337890625,
|
2074 |
+
"logps/chosen": -376.0,
|
2075 |
+
"logps/rejected": -354.0,
|
2076 |
+
"loss": 0.0114,
|
2077 |
+
"rewards/accuracies": 1.0,
|
2078 |
+
"rewards/chosen": 2.828125,
|
2079 |
+
"rewards/margins": 7.40625,
|
2080 |
+
"rewards/rejected": -4.59375,
|
2081 |
+
"step": 1350
|
2082 |
+
},
|
2083 |
+
{
|
2084 |
+
"epoch": 2.1794871794871793,
|
2085 |
+
"grad_norm": 1.8750426567792726,
|
2086 |
+
"learning_rate": 1.520190023752969e-07,
|
2087 |
+
"logits/chosen": -0.259765625,
|
2088 |
+
"logits/rejected": -0.59765625,
|
2089 |
+
"logps/chosen": -364.0,
|
2090 |
+
"logps/rejected": -330.0,
|
2091 |
+
"loss": 0.0138,
|
2092 |
+
"rewards/accuracies": 1.0,
|
2093 |
+
"rewards/chosen": 3.015625,
|
2094 |
+
"rewards/margins": 8.1875,
|
2095 |
+
"rewards/rejected": -5.15625,
|
2096 |
+
"step": 1360
|
2097 |
+
},
|
2098 |
+
{
|
2099 |
+
"epoch": 2.1955128205128207,
|
2100 |
+
"grad_norm": 7.170950559094129,
|
2101 |
+
"learning_rate": 1.490498812351544e-07,
|
2102 |
+
"logits/chosen": -0.251953125,
|
2103 |
+
"logits/rejected": -0.36328125,
|
2104 |
+
"logps/chosen": -362.0,
|
2105 |
+
"logps/rejected": -356.0,
|
2106 |
+
"loss": 0.0323,
|
2107 |
+
"rewards/accuracies": 1.0,
|
2108 |
+
"rewards/chosen": 3.0,
|
2109 |
+
"rewards/margins": 8.1875,
|
2110 |
+
"rewards/rejected": -5.1875,
|
2111 |
+
"step": 1370
|
2112 |
+
},
|
2113 |
+
{
|
2114 |
+
"epoch": 2.2115384615384617,
|
2115 |
+
"grad_norm": 21.23713813309802,
|
2116 |
+
"learning_rate": 1.4608076009501184e-07,
|
2117 |
+
"logits/chosen": -0.2431640625,
|
2118 |
+
"logits/rejected": -0.2353515625,
|
2119 |
+
"logps/chosen": -366.0,
|
2120 |
+
"logps/rejected": -356.0,
|
2121 |
+
"loss": 0.024,
|
2122 |
+
"rewards/accuracies": 1.0,
|
2123 |
+
"rewards/chosen": 2.6875,
|
2124 |
+
"rewards/margins": 7.8125,
|
2125 |
+
"rewards/rejected": -5.125,
|
2126 |
+
"step": 1380
|
2127 |
+
},
|
2128 |
+
{
|
2129 |
+
"epoch": 2.2275641025641026,
|
2130 |
+
"grad_norm": 7.169471258104354,
|
2131 |
+
"learning_rate": 1.4311163895486935e-07,
|
2132 |
+
"logits/chosen": -0.18359375,
|
2133 |
+
"logits/rejected": -0.29296875,
|
2134 |
+
"logps/chosen": -348.0,
|
2135 |
+
"logps/rejected": -364.0,
|
2136 |
+
"loss": 0.0257,
|
2137 |
+
"rewards/accuracies": 1.0,
|
2138 |
+
"rewards/chosen": 2.90625,
|
2139 |
+
"rewards/margins": 8.1875,
|
2140 |
+
"rewards/rejected": -5.25,
|
2141 |
+
"step": 1390
|
2142 |
+
},
|
2143 |
+
{
|
2144 |
+
"epoch": 2.2435897435897436,
|
2145 |
+
"grad_norm": 4.891546899421327,
|
2146 |
+
"learning_rate": 1.4014251781472683e-07,
|
2147 |
+
"logits/chosen": -0.22265625,
|
2148 |
+
"logits/rejected": -0.55078125,
|
2149 |
+
"logps/chosen": -344.0,
|
2150 |
+
"logps/rejected": -374.0,
|
2151 |
+
"loss": 0.0165,
|
2152 |
+
"rewards/accuracies": 0.987500011920929,
|
2153 |
+
"rewards/chosen": 2.71875,
|
2154 |
+
"rewards/margins": 7.4375,
|
2155 |
+
"rewards/rejected": -4.71875,
|
2156 |
+
"step": 1400
|
2157 |
+
},
|
2158 |
+
{
|
2159 |
+
"epoch": 2.2596153846153846,
|
2160 |
+
"grad_norm": 14.0088561337579,
|
2161 |
+
"learning_rate": 1.3717339667458433e-07,
|
2162 |
+
"logits/chosen": -0.171875,
|
2163 |
+
"logits/rejected": -0.375,
|
2164 |
+
"logps/chosen": -336.0,
|
2165 |
+
"logps/rejected": -380.0,
|
2166 |
+
"loss": 0.023,
|
2167 |
+
"rewards/accuracies": 1.0,
|
2168 |
+
"rewards/chosen": 2.328125,
|
2169 |
+
"rewards/margins": 8.125,
|
2170 |
+
"rewards/rejected": -5.8125,
|
2171 |
+
"step": 1410
|
2172 |
+
},
|
2173 |
+
{
|
2174 |
+
"epoch": 2.2756410256410255,
|
2175 |
+
"grad_norm": 5.478498841262275,
|
2176 |
+
"learning_rate": 1.342042755344418e-07,
|
2177 |
+
"logits/chosen": -0.244140625,
|
2178 |
+
"logits/rejected": -0.60546875,
|
2179 |
+
"logps/chosen": -340.0,
|
2180 |
+
"logps/rejected": -368.0,
|
2181 |
+
"loss": 0.0101,
|
2182 |
+
"rewards/accuracies": 1.0,
|
2183 |
+
"rewards/chosen": 2.078125,
|
2184 |
+
"rewards/margins": 7.125,
|
2185 |
+
"rewards/rejected": -5.0625,
|
2186 |
+
"step": 1420
|
2187 |
+
},
|
2188 |
+
{
|
2189 |
+
"epoch": 2.2916666666666665,
|
2190 |
+
"grad_norm": 4.08870815918569,
|
2191 |
+
"learning_rate": 1.3123515439429926e-07,
|
2192 |
+
"logits/chosen": -0.216796875,
|
2193 |
+
"logits/rejected": -0.5,
|
2194 |
+
"logps/chosen": -348.0,
|
2195 |
+
"logps/rejected": -380.0,
|
2196 |
+
"loss": 0.0094,
|
2197 |
+
"rewards/accuracies": 1.0,
|
2198 |
+
"rewards/chosen": 2.40625,
|
2199 |
+
"rewards/margins": 7.875,
|
2200 |
+
"rewards/rejected": -5.46875,
|
2201 |
+
"step": 1430
|
2202 |
+
},
|
2203 |
+
{
|
2204 |
+
"epoch": 2.3076923076923075,
|
2205 |
+
"grad_norm": 4.0828939935609885,
|
2206 |
+
"learning_rate": 1.2826603325415677e-07,
|
2207 |
+
"logits/chosen": -0.310546875,
|
2208 |
+
"logits/rejected": -0.3515625,
|
2209 |
+
"logps/chosen": -362.0,
|
2210 |
+
"logps/rejected": -372.0,
|
2211 |
+
"loss": 0.0115,
|
2212 |
+
"rewards/accuracies": 1.0,
|
2213 |
+
"rewards/chosen": 1.9296875,
|
2214 |
+
"rewards/margins": 7.90625,
|
2215 |
+
"rewards/rejected": -5.96875,
|
2216 |
+
"step": 1440
|
2217 |
+
},
|
2218 |
+
{
|
2219 |
+
"epoch": 2.323717948717949,
|
2220 |
+
"grad_norm": 2.2611774310220714,
|
2221 |
+
"learning_rate": 1.2529691211401425e-07,
|
2222 |
+
"logits/chosen": -0.2890625,
|
2223 |
+
"logits/rejected": -0.3671875,
|
2224 |
+
"logps/chosen": -350.0,
|
2225 |
+
"logps/rejected": -380.0,
|
2226 |
+
"loss": 0.0071,
|
2227 |
+
"rewards/accuracies": 1.0,
|
2228 |
+
"rewards/chosen": 2.359375,
|
2229 |
+
"rewards/margins": 8.1875,
|
2230 |
+
"rewards/rejected": -5.84375,
|
2231 |
+
"step": 1450
|
2232 |
+
},
|
2233 |
+
{
|
2234 |
+
"epoch": 2.33974358974359,
|
2235 |
+
"grad_norm": 35.51723177781465,
|
2236 |
+
"learning_rate": 1.2232779097387173e-07,
|
2237 |
+
"logits/chosen": -0.244140625,
|
2238 |
+
"logits/rejected": -0.36328125,
|
2239 |
+
"logps/chosen": -378.0,
|
2240 |
+
"logps/rejected": -368.0,
|
2241 |
+
"loss": 0.0773,
|
2242 |
+
"rewards/accuracies": 1.0,
|
2243 |
+
"rewards/chosen": 2.078125,
|
2244 |
+
"rewards/margins": 8.125,
|
2245 |
+
"rewards/rejected": -6.0,
|
2246 |
+
"step": 1460
|
2247 |
+
},
|
2248 |
+
{
|
2249 |
+
"epoch": 2.355769230769231,
|
2250 |
+
"grad_norm": 5.534009913713299,
|
2251 |
+
"learning_rate": 1.193586698337292e-07,
|
2252 |
+
"logits/chosen": -0.1767578125,
|
2253 |
+
"logits/rejected": -0.5390625,
|
2254 |
+
"logps/chosen": -368.0,
|
2255 |
+
"logps/rejected": -358.0,
|
2256 |
+
"loss": 0.012,
|
2257 |
+
"rewards/accuracies": 1.0,
|
2258 |
+
"rewards/chosen": 2.484375,
|
2259 |
+
"rewards/margins": 8.0625,
|
2260 |
+
"rewards/rejected": -5.59375,
|
2261 |
+
"step": 1470
|
2262 |
+
},
|
2263 |
+
{
|
2264 |
+
"epoch": 2.371794871794872,
|
2265 |
+
"grad_norm": 18.31189968205467,
|
2266 |
+
"learning_rate": 1.163895486935867e-07,
|
2267 |
+
"logits/chosen": -0.25390625,
|
2268 |
+
"logits/rejected": -0.353515625,
|
2269 |
+
"logps/chosen": -368.0,
|
2270 |
+
"logps/rejected": -356.0,
|
2271 |
+
"loss": 0.0359,
|
2272 |
+
"rewards/accuracies": 0.9750000238418579,
|
2273 |
+
"rewards/chosen": 2.3125,
|
2274 |
+
"rewards/margins": 7.78125,
|
2275 |
+
"rewards/rejected": -5.46875,
|
2276 |
+
"step": 1480
|
2277 |
+
},
|
2278 |
+
{
|
2279 |
+
"epoch": 2.3878205128205128,
|
2280 |
+
"grad_norm": 14.190561755907082,
|
2281 |
+
"learning_rate": 1.1342042755344417e-07,
|
2282 |
+
"logits/chosen": -0.1484375,
|
2283 |
+
"logits/rejected": -0.35546875,
|
2284 |
+
"logps/chosen": -376.0,
|
2285 |
+
"logps/rejected": -370.0,
|
2286 |
+
"loss": 0.0065,
|
2287 |
+
"rewards/accuracies": 1.0,
|
2288 |
+
"rewards/chosen": 2.734375,
|
2289 |
+
"rewards/margins": 8.4375,
|
2290 |
+
"rewards/rejected": -5.6875,
|
2291 |
+
"step": 1490
|
2292 |
+
},
|
2293 |
+
{
|
2294 |
+
"epoch": 2.4038461538461537,
|
2295 |
+
"grad_norm": 57.38633989383038,
|
2296 |
+
"learning_rate": 1.1045130641330165e-07,
|
2297 |
+
"logits/chosen": -0.2177734375,
|
2298 |
+
"logits/rejected": -0.24609375,
|
2299 |
+
"logps/chosen": -356.0,
|
2300 |
+
"logps/rejected": -376.0,
|
2301 |
+
"loss": 0.0762,
|
2302 |
+
"rewards/accuracies": 0.949999988079071,
|
2303 |
+
"rewards/chosen": 1.25,
|
2304 |
+
"rewards/margins": 7.25,
|
2305 |
+
"rewards/rejected": -6.0,
|
2306 |
+
"step": 1500
|
2307 |
+
},
|
2308 |
+
{
|
2309 |
+
"epoch": 2.4198717948717947,
|
2310 |
+
"grad_norm": 13.945337786807688,
|
2311 |
+
"learning_rate": 1.0748218527315913e-07,
|
2312 |
+
"logits/chosen": -0.2353515625,
|
2313 |
+
"logits/rejected": -0.486328125,
|
2314 |
+
"logps/chosen": -374.0,
|
2315 |
+
"logps/rejected": -386.0,
|
2316 |
+
"loss": 0.0146,
|
2317 |
+
"rewards/accuracies": 1.0,
|
2318 |
+
"rewards/chosen": 2.28125,
|
2319 |
+
"rewards/margins": 8.625,
|
2320 |
+
"rewards/rejected": -6.34375,
|
2321 |
+
"step": 1510
|
2322 |
+
},
|
2323 |
+
{
|
2324 |
+
"epoch": 2.435897435897436,
|
2325 |
+
"grad_norm": 2.025562206086781,
|
2326 |
+
"learning_rate": 1.0451306413301662e-07,
|
2327 |
+
"logits/chosen": -0.2890625,
|
2328 |
+
"logits/rejected": -0.404296875,
|
2329 |
+
"logps/chosen": -362.0,
|
2330 |
+
"logps/rejected": -360.0,
|
2331 |
+
"loss": 0.0058,
|
2332 |
+
"rewards/accuracies": 1.0,
|
2333 |
+
"rewards/chosen": 2.96875,
|
2334 |
+
"rewards/margins": 8.875,
|
2335 |
+
"rewards/rejected": -5.90625,
|
2336 |
+
"step": 1520
|
2337 |
+
},
|
2338 |
+
{
|
2339 |
+
"epoch": 2.451923076923077,
|
2340 |
+
"grad_norm": 2.5818458781278997,
|
2341 |
+
"learning_rate": 1.0154394299287411e-07,
|
2342 |
+
"logits/chosen": -0.20703125,
|
2343 |
+
"logits/rejected": -0.380859375,
|
2344 |
+
"logps/chosen": -356.0,
|
2345 |
+
"logps/rejected": -378.0,
|
2346 |
+
"loss": 0.0084,
|
2347 |
+
"rewards/accuracies": 1.0,
|
2348 |
+
"rewards/chosen": 2.75,
|
2349 |
+
"rewards/margins": 8.375,
|
2350 |
+
"rewards/rejected": -5.59375,
|
2351 |
+
"step": 1530
|
2352 |
+
},
|
2353 |
+
{
|
2354 |
+
"epoch": 2.467948717948718,
|
2355 |
+
"grad_norm": 0.5147935469188503,
|
2356 |
+
"learning_rate": 9.857482185273158e-08,
|
2357 |
+
"logits/chosen": -0.240234375,
|
2358 |
+
"logits/rejected": -0.47265625,
|
2359 |
+
"logps/chosen": -348.0,
|
2360 |
+
"logps/rejected": -354.0,
|
2361 |
+
"loss": 0.0156,
|
2362 |
+
"rewards/accuracies": 1.0,
|
2363 |
+
"rewards/chosen": 2.609375,
|
2364 |
+
"rewards/margins": 8.125,
|
2365 |
+
"rewards/rejected": -5.46875,
|
2366 |
+
"step": 1540
|
2367 |
+
},
|
2368 |
+
{
|
2369 |
+
"epoch": 2.483974358974359,
|
2370 |
+
"grad_norm": 2.5527476910216764,
|
2371 |
+
"learning_rate": 9.560570071258907e-08,
|
2372 |
+
"logits/chosen": -0.1748046875,
|
2373 |
+
"logits/rejected": -0.392578125,
|
2374 |
+
"logps/chosen": -378.0,
|
2375 |
+
"logps/rejected": -336.0,
|
2376 |
+
"loss": 0.0132,
|
2377 |
+
"rewards/accuracies": 1.0,
|
2378 |
+
"rewards/chosen": 2.34375,
|
2379 |
+
"rewards/margins": 8.125,
|
2380 |
+
"rewards/rejected": -5.8125,
|
2381 |
+
"step": 1550
|
2382 |
+
},
|
2383 |
+
{
|
2384 |
+
"epoch": 2.5,
|
2385 |
+
"grad_norm": 2.6084669807533807,
|
2386 |
+
"learning_rate": 9.263657957244655e-08,
|
2387 |
+
"logits/chosen": -0.28125,
|
2388 |
+
"logits/rejected": -0.40234375,
|
2389 |
+
"logps/chosen": -340.0,
|
2390 |
+
"logps/rejected": -386.0,
|
2391 |
+
"loss": 0.0112,
|
2392 |
+
"rewards/accuracies": 0.987500011920929,
|
2393 |
+
"rewards/chosen": 2.78125,
|
2394 |
+
"rewards/margins": 8.4375,
|
2395 |
+
"rewards/rejected": -5.6875,
|
2396 |
+
"step": 1560
|
2397 |
+
},
|
2398 |
+
{
|
2399 |
+
"epoch": 2.516025641025641,
|
2400 |
+
"grad_norm": 0.5142214115866551,
|
2401 |
+
"learning_rate": 8.966745843230403e-08,
|
2402 |
+
"logits/chosen": -0.26953125,
|
2403 |
+
"logits/rejected": -0.34765625,
|
2404 |
+
"logps/chosen": -332.0,
|
2405 |
+
"logps/rejected": -338.0,
|
2406 |
+
"loss": 0.0087,
|
2407 |
+
"rewards/accuracies": 1.0,
|
2408 |
+
"rewards/chosen": 2.625,
|
2409 |
+
"rewards/margins": 8.25,
|
2410 |
+
"rewards/rejected": -5.625,
|
2411 |
+
"step": 1570
|
2412 |
+
},
|
2413 |
+
{
|
2414 |
+
"epoch": 2.532051282051282,
|
2415 |
+
"grad_norm": 0.4686133876015494,
|
2416 |
+
"learning_rate": 8.669833729216151e-08,
|
2417 |
+
"logits/chosen": -0.2080078125,
|
2418 |
+
"logits/rejected": -0.36328125,
|
2419 |
+
"logps/chosen": -374.0,
|
2420 |
+
"logps/rejected": -382.0,
|
2421 |
+
"loss": 0.0066,
|
2422 |
+
"rewards/accuracies": 1.0,
|
2423 |
+
"rewards/chosen": 2.0625,
|
2424 |
+
"rewards/margins": 7.84375,
|
2425 |
+
"rewards/rejected": -5.78125,
|
2426 |
+
"step": 1580
|
2427 |
+
},
|
2428 |
+
{
|
2429 |
+
"epoch": 2.5480769230769234,
|
2430 |
+
"grad_norm": 4.377458752115824,
|
2431 |
+
"learning_rate": 8.3729216152019e-08,
|
2432 |
+
"logits/chosen": -0.240234375,
|
2433 |
+
"logits/rejected": -0.50390625,
|
2434 |
+
"logps/chosen": -358.0,
|
2435 |
+
"logps/rejected": -370.0,
|
2436 |
+
"loss": 0.0077,
|
2437 |
+
"rewards/accuracies": 1.0,
|
2438 |
+
"rewards/chosen": 2.375,
|
2439 |
+
"rewards/margins": 8.4375,
|
2440 |
+
"rewards/rejected": -6.03125,
|
2441 |
+
"step": 1590
|
2442 |
+
},
|
2443 |
+
{
|
2444 |
+
"epoch": 2.564102564102564,
|
2445 |
+
"grad_norm": 8.575422939319253,
|
2446 |
+
"learning_rate": 8.076009501187649e-08,
|
2447 |
+
"logits/chosen": -0.2490234375,
|
2448 |
+
"logits/rejected": -0.337890625,
|
2449 |
+
"logps/chosen": -348.0,
|
2450 |
+
"logps/rejected": -370.0,
|
2451 |
+
"loss": 0.0241,
|
2452 |
+
"rewards/accuracies": 0.987500011920929,
|
2453 |
+
"rewards/chosen": 2.484375,
|
2454 |
+
"rewards/margins": 7.5,
|
2455 |
+
"rewards/rejected": -5.0,
|
2456 |
+
"step": 1600
|
2457 |
+
},
|
2458 |
+
{
|
2459 |
+
"epoch": 2.5801282051282053,
|
2460 |
+
"grad_norm": 2.551052943978534,
|
2461 |
+
"learning_rate": 7.779097387173396e-08,
|
2462 |
+
"logits/chosen": -0.2734375,
|
2463 |
+
"logits/rejected": -0.375,
|
2464 |
+
"logps/chosen": -342.0,
|
2465 |
+
"logps/rejected": -360.0,
|
2466 |
+
"loss": 0.0082,
|
2467 |
+
"rewards/accuracies": 1.0,
|
2468 |
+
"rewards/chosen": 2.8125,
|
2469 |
+
"rewards/margins": 7.8125,
|
2470 |
+
"rewards/rejected": -4.96875,
|
2471 |
+
"step": 1610
|
2472 |
+
},
|
2473 |
+
{
|
2474 |
+
"epoch": 2.5961538461538463,
|
2475 |
+
"grad_norm": 6.000172766868698,
|
2476 |
+
"learning_rate": 7.482185273159145e-08,
|
2477 |
+
"logits/chosen": -0.263671875,
|
2478 |
+
"logits/rejected": -0.451171875,
|
2479 |
+
"logps/chosen": -368.0,
|
2480 |
+
"logps/rejected": -378.0,
|
2481 |
+
"loss": 0.0303,
|
2482 |
+
"rewards/accuracies": 1.0,
|
2483 |
+
"rewards/chosen": 2.46875,
|
2484 |
+
"rewards/margins": 7.96875,
|
2485 |
+
"rewards/rejected": -5.5,
|
2486 |
+
"step": 1620
|
2487 |
+
},
|
2488 |
+
{
|
2489 |
+
"epoch": 2.6121794871794872,
|
2490 |
+
"grad_norm": 19.722033841376266,
|
2491 |
+
"learning_rate": 7.185273159144893e-08,
|
2492 |
+
"logits/chosen": -0.294921875,
|
2493 |
+
"logits/rejected": -0.392578125,
|
2494 |
+
"logps/chosen": -366.0,
|
2495 |
+
"logps/rejected": -398.0,
|
2496 |
+
"loss": 0.0123,
|
2497 |
+
"rewards/accuracies": 1.0,
|
2498 |
+
"rewards/chosen": 2.71875,
|
2499 |
+
"rewards/margins": 8.25,
|
2500 |
+
"rewards/rejected": -5.53125,
|
2501 |
+
"step": 1630
|
2502 |
+
},
|
2503 |
+
{
|
2504 |
+
"epoch": 2.628205128205128,
|
2505 |
+
"grad_norm": 11.30540085696599,
|
2506 |
+
"learning_rate": 6.88836104513064e-08,
|
2507 |
+
"logits/chosen": -0.18359375,
|
2508 |
+
"logits/rejected": -0.314453125,
|
2509 |
+
"logps/chosen": -380.0,
|
2510 |
+
"logps/rejected": -354.0,
|
2511 |
+
"loss": 0.0078,
|
2512 |
+
"rewards/accuracies": 1.0,
|
2513 |
+
"rewards/chosen": 2.734375,
|
2514 |
+
"rewards/margins": 8.6875,
|
2515 |
+
"rewards/rejected": -5.96875,
|
2516 |
+
"step": 1640
|
2517 |
+
},
|
2518 |
+
{
|
2519 |
+
"epoch": 2.644230769230769,
|
2520 |
+
"grad_norm": 1.1773482574998353,
|
2521 |
+
"learning_rate": 6.591448931116388e-08,
|
2522 |
+
"logits/chosen": -0.162109375,
|
2523 |
+
"logits/rejected": -0.4609375,
|
2524 |
+
"logps/chosen": -378.0,
|
2525 |
+
"logps/rejected": -374.0,
|
2526 |
+
"loss": 0.0175,
|
2527 |
+
"rewards/accuracies": 0.9750000238418579,
|
2528 |
+
"rewards/chosen": 2.125,
|
2529 |
+
"rewards/margins": 8.0625,
|
2530 |
+
"rewards/rejected": -5.9375,
|
2531 |
+
"step": 1650
|
2532 |
+
},
|
2533 |
+
{
|
2534 |
+
"epoch": 2.66025641025641,
|
2535 |
+
"grad_norm": 52.67450880620409,
|
2536 |
+
"learning_rate": 6.294536817102138e-08,
|
2537 |
+
"logits/chosen": -0.294921875,
|
2538 |
+
"logits/rejected": -0.58203125,
|
2539 |
+
"logps/chosen": -386.0,
|
2540 |
+
"logps/rejected": -376.0,
|
2541 |
+
"loss": 0.0316,
|
2542 |
+
"rewards/accuracies": 0.987500011920929,
|
2543 |
+
"rewards/chosen": 1.828125,
|
2544 |
+
"rewards/margins": 7.53125,
|
2545 |
+
"rewards/rejected": -5.6875,
|
2546 |
+
"step": 1660
|
2547 |
+
},
|
2548 |
+
{
|
2549 |
+
"epoch": 2.676282051282051,
|
2550 |
+
"grad_norm": 2.444649208747581,
|
2551 |
+
"learning_rate": 5.997624703087885e-08,
|
2552 |
+
"logits/chosen": -0.2734375,
|
2553 |
+
"logits/rejected": -0.30859375,
|
2554 |
+
"logps/chosen": -348.0,
|
2555 |
+
"logps/rejected": -330.0,
|
2556 |
+
"loss": 0.0139,
|
2557 |
+
"rewards/accuracies": 1.0,
|
2558 |
+
"rewards/chosen": 2.34375,
|
2559 |
+
"rewards/margins": 7.96875,
|
2560 |
+
"rewards/rejected": -5.625,
|
2561 |
+
"step": 1670
|
2562 |
+
},
|
2563 |
+
{
|
2564 |
+
"epoch": 2.6923076923076925,
|
2565 |
+
"grad_norm": 1.8309710467577502,
|
2566 |
+
"learning_rate": 5.700712589073634e-08,
|
2567 |
+
"logits/chosen": -0.33203125,
|
2568 |
+
"logits/rejected": -0.5546875,
|
2569 |
+
"logps/chosen": -368.0,
|
2570 |
+
"logps/rejected": -372.0,
|
2571 |
+
"loss": 0.0044,
|
2572 |
+
"rewards/accuracies": 1.0,
|
2573 |
+
"rewards/chosen": 2.578125,
|
2574 |
+
"rewards/margins": 8.3125,
|
2575 |
+
"rewards/rejected": -5.75,
|
2576 |
+
"step": 1680
|
2577 |
+
},
|
2578 |
+
{
|
2579 |
+
"epoch": 2.7083333333333335,
|
2580 |
+
"grad_norm": 0.974183927512936,
|
2581 |
+
"learning_rate": 5.4038004750593824e-08,
|
2582 |
+
"logits/chosen": -0.212890625,
|
2583 |
+
"logits/rejected": -0.384765625,
|
2584 |
+
"logps/chosen": -370.0,
|
2585 |
+
"logps/rejected": -366.0,
|
2586 |
+
"loss": 0.0061,
|
2587 |
+
"rewards/accuracies": 1.0,
|
2588 |
+
"rewards/chosen": 2.4375,
|
2589 |
+
"rewards/margins": 8.625,
|
2590 |
+
"rewards/rejected": -6.21875,
|
2591 |
+
"step": 1690
|
2592 |
+
},
|
2593 |
+
{
|
2594 |
+
"epoch": 2.7243589743589745,
|
2595 |
+
"grad_norm": 8.112271810670284,
|
2596 |
+
"learning_rate": 5.10688836104513e-08,
|
2597 |
+
"logits/chosen": -0.287109375,
|
2598 |
+
"logits/rejected": -0.34375,
|
2599 |
+
"logps/chosen": -364.0,
|
2600 |
+
"logps/rejected": -384.0,
|
2601 |
+
"loss": 0.0074,
|
2602 |
+
"rewards/accuracies": 1.0,
|
2603 |
+
"rewards/chosen": 2.53125,
|
2604 |
+
"rewards/margins": 8.625,
|
2605 |
+
"rewards/rejected": -6.09375,
|
2606 |
+
"step": 1700
|
2607 |
+
},
|
2608 |
+
{
|
2609 |
+
"epoch": 2.7403846153846154,
|
2610 |
+
"grad_norm": 48.323303487774965,
|
2611 |
+
"learning_rate": 4.809976247030879e-08,
|
2612 |
+
"logits/chosen": -0.2734375,
|
2613 |
+
"logits/rejected": -0.39453125,
|
2614 |
+
"logps/chosen": -378.0,
|
2615 |
+
"logps/rejected": -368.0,
|
2616 |
+
"loss": 0.0187,
|
2617 |
+
"rewards/accuracies": 1.0,
|
2618 |
+
"rewards/chosen": 2.28125,
|
2619 |
+
"rewards/margins": 7.9375,
|
2620 |
+
"rewards/rejected": -5.65625,
|
2621 |
+
"step": 1710
|
2622 |
+
},
|
2623 |
+
{
|
2624 |
+
"epoch": 2.7564102564102564,
|
2625 |
+
"grad_norm": 0.6216279700601488,
|
2626 |
+
"learning_rate": 4.5130641330166267e-08,
|
2627 |
+
"logits/chosen": -0.2470703125,
|
2628 |
+
"logits/rejected": -0.3828125,
|
2629 |
+
"logps/chosen": -340.0,
|
2630 |
+
"logps/rejected": -372.0,
|
2631 |
+
"loss": 0.0305,
|
2632 |
+
"rewards/accuracies": 0.987500011920929,
|
2633 |
+
"rewards/chosen": 2.25,
|
2634 |
+
"rewards/margins": 7.6875,
|
2635 |
+
"rewards/rejected": -5.4375,
|
2636 |
+
"step": 1720
|
2637 |
+
},
|
2638 |
+
{
|
2639 |
+
"epoch": 2.7724358974358974,
|
2640 |
+
"grad_norm": 8.386988307728698,
|
2641 |
+
"learning_rate": 4.216152019002375e-08,
|
2642 |
+
"logits/chosen": -0.2060546875,
|
2643 |
+
"logits/rejected": -0.349609375,
|
2644 |
+
"logps/chosen": -360.0,
|
2645 |
+
"logps/rejected": -364.0,
|
2646 |
+
"loss": 0.0062,
|
2647 |
+
"rewards/accuracies": 1.0,
|
2648 |
+
"rewards/chosen": 2.734375,
|
2649 |
+
"rewards/margins": 8.4375,
|
2650 |
+
"rewards/rejected": -5.6875,
|
2651 |
+
"step": 1730
|
2652 |
+
},
|
2653 |
+
{
|
2654 |
+
"epoch": 2.7884615384615383,
|
2655 |
+
"grad_norm": 2.6310329285858587,
|
2656 |
+
"learning_rate": 3.919239904988123e-08,
|
2657 |
+
"logits/chosen": -0.1826171875,
|
2658 |
+
"logits/rejected": -0.333984375,
|
2659 |
+
"logps/chosen": -358.0,
|
2660 |
+
"logps/rejected": -390.0,
|
2661 |
+
"loss": 0.0088,
|
2662 |
+
"rewards/accuracies": 1.0,
|
2663 |
+
"rewards/chosen": 2.28125,
|
2664 |
+
"rewards/margins": 7.875,
|
2665 |
+
"rewards/rejected": -5.59375,
|
2666 |
+
"step": 1740
|
2667 |
+
},
|
2668 |
+
{
|
2669 |
+
"epoch": 2.8044871794871797,
|
2670 |
+
"grad_norm": 3.066349322026837,
|
2671 |
+
"learning_rate": 3.6223277909738715e-08,
|
2672 |
+
"logits/chosen": -0.2236328125,
|
2673 |
+
"logits/rejected": -0.357421875,
|
2674 |
+
"logps/chosen": -350.0,
|
2675 |
+
"logps/rejected": -368.0,
|
2676 |
+
"loss": 0.0092,
|
2677 |
+
"rewards/accuracies": 1.0,
|
2678 |
+
"rewards/chosen": 2.40625,
|
2679 |
+
"rewards/margins": 7.96875,
|
2680 |
+
"rewards/rejected": -5.5625,
|
2681 |
+
"step": 1750
|
2682 |
+
},
|
2683 |
+
{
|
2684 |
+
"epoch": 2.8205128205128203,
|
2685 |
+
"grad_norm": 8.079242737705172,
|
2686 |
+
"learning_rate": 3.32541567695962e-08,
|
2687 |
+
"logits/chosen": -0.0966796875,
|
2688 |
+
"logits/rejected": -0.2431640625,
|
2689 |
+
"logps/chosen": -342.0,
|
2690 |
+
"logps/rejected": -360.0,
|
2691 |
+
"loss": 0.0237,
|
2692 |
+
"rewards/accuracies": 1.0,
|
2693 |
+
"rewards/chosen": 2.421875,
|
2694 |
+
"rewards/margins": 8.4375,
|
2695 |
+
"rewards/rejected": -6.03125,
|
2696 |
+
"step": 1760
|
2697 |
+
},
|
2698 |
+
{
|
2699 |
+
"epoch": 2.8365384615384617,
|
2700 |
+
"grad_norm": 36.85905904660706,
|
2701 |
+
"learning_rate": 3.028503562945368e-08,
|
2702 |
+
"logits/chosen": -0.1962890625,
|
2703 |
+
"logits/rejected": -0.1435546875,
|
2704 |
+
"logps/chosen": -376.0,
|
2705 |
+
"logps/rejected": -366.0,
|
2706 |
+
"loss": 0.0138,
|
2707 |
+
"rewards/accuracies": 1.0,
|
2708 |
+
"rewards/chosen": 3.015625,
|
2709 |
+
"rewards/margins": 8.75,
|
2710 |
+
"rewards/rejected": -5.71875,
|
2711 |
+
"step": 1770
|
2712 |
+
},
|
2713 |
+
{
|
2714 |
+
"epoch": 2.8525641025641026,
|
2715 |
+
"grad_norm": 1.3700769518291278,
|
2716 |
+
"learning_rate": 2.7315914489311164e-08,
|
2717 |
+
"logits/chosen": -0.177734375,
|
2718 |
+
"logits/rejected": -0.29296875,
|
2719 |
+
"logps/chosen": -354.0,
|
2720 |
+
"logps/rejected": -366.0,
|
2721 |
+
"loss": 0.0047,
|
2722 |
+
"rewards/accuracies": 1.0,
|
2723 |
+
"rewards/chosen": 2.1875,
|
2724 |
+
"rewards/margins": 8.375,
|
2725 |
+
"rewards/rejected": -6.15625,
|
2726 |
+
"step": 1780
|
2727 |
+
},
|
2728 |
+
{
|
2729 |
+
"epoch": 2.8685897435897436,
|
2730 |
+
"grad_norm": 2.5772413830882437,
|
2731 |
+
"learning_rate": 2.4346793349168646e-08,
|
2732 |
+
"logits/chosen": -0.3515625,
|
2733 |
+
"logits/rejected": -0.40625,
|
2734 |
+
"logps/chosen": -356.0,
|
2735 |
+
"logps/rejected": -380.0,
|
2736 |
+
"loss": 0.0071,
|
2737 |
+
"rewards/accuracies": 1.0,
|
2738 |
+
"rewards/chosen": 2.265625,
|
2739 |
+
"rewards/margins": 8.25,
|
2740 |
+
"rewards/rejected": -5.96875,
|
2741 |
+
"step": 1790
|
2742 |
+
},
|
2743 |
+
{
|
2744 |
+
"epoch": 2.8846153846153846,
|
2745 |
+
"grad_norm": 0.2522421688634616,
|
2746 |
+
"learning_rate": 2.1377672209026125e-08,
|
2747 |
+
"logits/chosen": -0.1884765625,
|
2748 |
+
"logits/rejected": -0.296875,
|
2749 |
+
"logps/chosen": -354.0,
|
2750 |
+
"logps/rejected": -362.0,
|
2751 |
+
"loss": 0.0155,
|
2752 |
+
"rewards/accuracies": 0.987500011920929,
|
2753 |
+
"rewards/chosen": 2.609375,
|
2754 |
+
"rewards/margins": 8.625,
|
2755 |
+
"rewards/rejected": -5.96875,
|
2756 |
+
"step": 1800
|
2757 |
+
},
|
2758 |
+
{
|
2759 |
+
"epoch": 2.9006410256410255,
|
2760 |
+
"grad_norm": 2.0378946111088347,
|
2761 |
+
"learning_rate": 1.840855106888361e-08,
|
2762 |
+
"logits/chosen": -0.298828125,
|
2763 |
+
"logits/rejected": -0.4765625,
|
2764 |
+
"logps/chosen": -346.0,
|
2765 |
+
"logps/rejected": -370.0,
|
2766 |
+
"loss": 0.0427,
|
2767 |
+
"rewards/accuracies": 0.9750000238418579,
|
2768 |
+
"rewards/chosen": 1.828125,
|
2769 |
+
"rewards/margins": 7.90625,
|
2770 |
+
"rewards/rejected": -6.0625,
|
2771 |
+
"step": 1810
|
2772 |
+
},
|
2773 |
+
{
|
2774 |
+
"epoch": 2.9166666666666665,
|
2775 |
+
"grad_norm": 2.2386092025677784,
|
2776 |
+
"learning_rate": 1.5439429928741092e-08,
|
2777 |
+
"logits/chosen": -0.21875,
|
2778 |
+
"logits/rejected": -0.2138671875,
|
2779 |
+
"logps/chosen": -380.0,
|
2780 |
+
"logps/rejected": -372.0,
|
2781 |
+
"loss": 0.0179,
|
2782 |
+
"rewards/accuracies": 0.987500011920929,
|
2783 |
+
"rewards/chosen": 1.84375,
|
2784 |
+
"rewards/margins": 8.5,
|
2785 |
+
"rewards/rejected": -6.65625,
|
2786 |
+
"step": 1820
|
2787 |
+
},
|
2788 |
+
{
|
2789 |
+
"epoch": 2.9326923076923075,
|
2790 |
+
"grad_norm": 5.606669828245322,
|
2791 |
+
"learning_rate": 1.2470308788598574e-08,
|
2792 |
+
"logits/chosen": -0.26953125,
|
2793 |
+
"logits/rejected": -0.224609375,
|
2794 |
+
"logps/chosen": -364.0,
|
2795 |
+
"logps/rejected": -346.0,
|
2796 |
+
"loss": 0.0302,
|
2797 |
+
"rewards/accuracies": 1.0,
|
2798 |
+
"rewards/chosen": 2.5625,
|
2799 |
+
"rewards/margins": 8.5625,
|
2800 |
+
"rewards/rejected": -5.96875,
|
2801 |
+
"step": 1830
|
2802 |
+
},
|
2803 |
+
{
|
2804 |
+
"epoch": 2.948717948717949,
|
2805 |
+
"grad_norm": 7.459897331240726,
|
2806 |
+
"learning_rate": 9.501187648456057e-09,
|
2807 |
+
"logits/chosen": -0.166015625,
|
2808 |
+
"logits/rejected": -0.27734375,
|
2809 |
+
"logps/chosen": -338.0,
|
2810 |
+
"logps/rejected": -372.0,
|
2811 |
+
"loss": 0.0069,
|
2812 |
+
"rewards/accuracies": 1.0,
|
2813 |
+
"rewards/chosen": 1.9453125,
|
2814 |
+
"rewards/margins": 8.375,
|
2815 |
+
"rewards/rejected": -6.40625,
|
2816 |
+
"step": 1840
|
2817 |
+
},
|
2818 |
+
{
|
2819 |
+
"epoch": 2.96474358974359,
|
2820 |
+
"grad_norm": 46.309709716339626,
|
2821 |
+
"learning_rate": 6.532066508313539e-09,
|
2822 |
+
"logits/chosen": -0.265625,
|
2823 |
+
"logits/rejected": -0.3046875,
|
2824 |
+
"logps/chosen": -368.0,
|
2825 |
+
"logps/rejected": -392.0,
|
2826 |
+
"loss": 0.0144,
|
2827 |
+
"rewards/accuracies": 1.0,
|
2828 |
+
"rewards/chosen": 2.125,
|
2829 |
+
"rewards/margins": 8.5,
|
2830 |
+
"rewards/rejected": -6.375,
|
2831 |
+
"step": 1850
|
2832 |
+
},
|
2833 |
+
{
|
2834 |
+
"epoch": 2.980769230769231,
|
2835 |
+
"grad_norm": 2.757504775523463,
|
2836 |
+
"learning_rate": 3.562945368171021e-09,
|
2837 |
+
"logits/chosen": -0.2314453125,
|
2838 |
+
"logits/rejected": -0.3515625,
|
2839 |
+
"logps/chosen": -366.0,
|
2840 |
+
"logps/rejected": -382.0,
|
2841 |
+
"loss": 0.0081,
|
2842 |
+
"rewards/accuracies": 1.0,
|
2843 |
+
"rewards/chosen": 2.3125,
|
2844 |
+
"rewards/margins": 8.125,
|
2845 |
+
"rewards/rejected": -5.84375,
|
2846 |
+
"step": 1860
|
2847 |
+
},
|
2848 |
+
{
|
2849 |
+
"epoch": 2.996794871794872,
|
2850 |
+
"grad_norm": 0.7690916064350508,
|
2851 |
+
"learning_rate": 5.938242280285036e-10,
|
2852 |
+
"logits/chosen": -0.146484375,
|
2853 |
+
"logits/rejected": -0.361328125,
|
2854 |
+
"logps/chosen": -352.0,
|
2855 |
+
"logps/rejected": -378.0,
|
2856 |
+
"loss": 0.0214,
|
2857 |
+
"rewards/accuracies": 1.0,
|
2858 |
+
"rewards/chosen": 2.9375,
|
2859 |
+
"rewards/margins": 8.75,
|
2860 |
+
"rewards/rejected": -5.84375,
|
2861 |
+
"step": 1870
|
2862 |
+
},
|
2863 |
+
{
|
2864 |
+
"epoch": 3.0,
|
2865 |
+
"eval_logits/chosen": -0.2392578125,
|
2866 |
+
"eval_logits/rejected": -0.2734375,
|
2867 |
+
"eval_logps/chosen": -364.0,
|
2868 |
+
"eval_logps/rejected": -370.0,
|
2869 |
+
"eval_loss": 0.21664032340049744,
|
2870 |
+
"eval_rewards/accuracies": 0.9134615659713745,
|
2871 |
+
"eval_rewards/chosen": 1.7734375,
|
2872 |
+
"eval_rewards/margins": 6.65625,
|
2873 |
+
"eval_rewards/rejected": -4.875,
|
2874 |
+
"eval_runtime": 26.3085,
|
2875 |
+
"eval_samples_per_second": 7.602,
|
2876 |
+
"eval_steps_per_second": 0.494,
|
2877 |
+
"step": 1872
|
2878 |
}
|
2879 |
],
|
2880 |
"logging_steps": 10,
|
|
|
2889 |
"should_evaluate": false,
|
2890 |
"should_log": false,
|
2891 |
"should_save": true,
|
2892 |
+
"should_training_stop": true
|
2893 |
},
|
2894 |
"attributes": {}
|
2895 |
}
|