AmberYifan commited on
Commit
33e35b5
·
verified ·
1 Parent(s): 26bb42a

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/global_step1872/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c1b0737d9ef833cc5447393be9645063199f9c62e660078c52e845aa32896b
3
+ size 30462473157
last-checkpoint/global_step1872/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:817359d4e1fee825f428c471135c1a7e867c2eed7ed945e8a13b0997f00d4daa
3
+ size 30462473157
last-checkpoint/global_step1872/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f20044a320dfaceff8d5f009e24b0c20b18aa9918609113a30e834ccbce7a2a
3
+ size 168021
last-checkpoint/global_step1872/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c4a2977f7897858d681153444d2ebd90e00463a70a1038df61619876f7f93b
3
+ size 168021
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1248
 
1
+ global_step1872
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30dd02bdd9a1699ec1b6185608050346737c71cf556d27d7a82e5d9ce434fdd0
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80fcbf5f63a031db5e7b7cc0059d8610275f60c9994a6affd53349d73d62303d
3
  size 4877660776
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:025ca15fd6ee18e66623c43e1b6715ec460b3c1db4b5ae83059f58b01f690c9f
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b40d8045439d7259e9b7354ad01954826a7161688838b7467a7e317daee489fd
3
  size 4932751008
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:584a552a41e564fcd7f833dc0769480f6c7fd9cd9d71a3fe96f234305784179b
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f96435a411a27508b63821508e0f9ac352278204b2c94d715d3f47b37f0c7e59
3
  size 4330865200
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b7aff33fb6e1e771d13945f4cb5b44d4da73a682d5a6e9366a6386df2908523
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cad27ebfd179a88097c9008af3e226adae1d17acab641c6855cd3bc85c3d7d
3
  size 1089994880
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9698021f2d84167912e7be6ba48d3d2b8d6b20894f23319f36df078c03b33a64
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95430508d31bbe1a66a940e2572bf04addefae3e1c4e861e8657f66d302aa23e
3
  size 14768
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90a140d1d010220b1679bf6e519f8d3d518cb57331e0e7fb30008dc00e427811
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b70a6983e6085768b4d2b447a8bd35374bce9cf4ea8e8fbefc1260ca2e054a70
3
  size 14768
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3b7bf841d31595f02eb1941a019ed4bf66f16ec16e6c5eb963a853209da5c2f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b15b652e34e702b048a2fe65a8149c25795b8f29765f41806646e987f007a10
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 1248,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1914,6 +1914,967 @@
1914
  "eval_samples_per_second": 6.892,
1915
  "eval_steps_per_second": 0.448,
1916
  "step": 1248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1917
  }
1918
  ],
1919
  "logging_steps": 10,
@@ -1928,7 +2889,7 @@
1928
  "should_evaluate": false,
1929
  "should_log": false,
1930
  "should_save": true,
1931
- "should_training_stop": false
1932
  },
1933
  "attributes": {}
1934
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 1872,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1914
  "eval_samples_per_second": 6.892,
1915
  "eval_steps_per_second": 0.448,
1916
  "step": 1248
1917
+ },
1918
+ {
1919
+ "epoch": 2.003205128205128,
1920
+ "grad_norm": 2.3679308142140885,
1921
+ "learning_rate": 1.846793349168646e-07,
1922
+ "logits/chosen": -0.341796875,
1923
+ "logits/rejected": -0.46484375,
1924
+ "logps/chosen": -366.0,
1925
+ "logps/rejected": -352.0,
1926
+ "loss": 0.0315,
1927
+ "rewards/accuracies": 0.987500011920929,
1928
+ "rewards/chosen": 2.609375,
1929
+ "rewards/margins": 6.9375,
1930
+ "rewards/rejected": -4.3125,
1931
+ "step": 1250
1932
+ },
1933
+ {
1934
+ "epoch": 2.019230769230769,
1935
+ "grad_norm": 2.590719502204235,
1936
+ "learning_rate": 1.8171021377672207e-07,
1937
+ "logits/chosen": -0.25,
1938
+ "logits/rejected": -0.404296875,
1939
+ "logps/chosen": -378.0,
1940
+ "logps/rejected": -364.0,
1941
+ "loss": 0.0081,
1942
+ "rewards/accuracies": 1.0,
1943
+ "rewards/chosen": 2.015625,
1944
+ "rewards/margins": 6.90625,
1945
+ "rewards/rejected": -4.875,
1946
+ "step": 1260
1947
+ },
1948
+ {
1949
+ "epoch": 2.03525641025641,
1950
+ "grad_norm": 22.175507349451703,
1951
+ "learning_rate": 1.7874109263657958e-07,
1952
+ "logits/chosen": -0.2275390625,
1953
+ "logits/rejected": -0.37890625,
1954
+ "logps/chosen": -364.0,
1955
+ "logps/rejected": -384.0,
1956
+ "loss": 0.0156,
1957
+ "rewards/accuracies": 0.987500011920929,
1958
+ "rewards/chosen": 2.0625,
1959
+ "rewards/margins": 7.375,
1960
+ "rewards/rejected": -5.3125,
1961
+ "step": 1270
1962
+ },
1963
+ {
1964
+ "epoch": 2.051282051282051,
1965
+ "grad_norm": 4.568251724068892,
1966
+ "learning_rate": 1.7577197149643706e-07,
1967
+ "logits/chosen": -0.25,
1968
+ "logits/rejected": -0.482421875,
1969
+ "logps/chosen": -380.0,
1970
+ "logps/rejected": -370.0,
1971
+ "loss": 0.0145,
1972
+ "rewards/accuracies": 1.0,
1973
+ "rewards/chosen": 1.953125,
1974
+ "rewards/margins": 7.46875,
1975
+ "rewards/rejected": -5.53125,
1976
+ "step": 1280
1977
+ },
1978
+ {
1979
+ "epoch": 2.0673076923076925,
1980
+ "grad_norm": 4.904685892165479,
1981
+ "learning_rate": 1.728028503562945e-07,
1982
+ "logits/chosen": -0.22265625,
1983
+ "logits/rejected": -0.466796875,
1984
+ "logps/chosen": -376.0,
1985
+ "logps/rejected": -366.0,
1986
+ "loss": 0.0088,
1987
+ "rewards/accuracies": 1.0,
1988
+ "rewards/chosen": 2.515625,
1989
+ "rewards/margins": 7.3125,
1990
+ "rewards/rejected": -4.78125,
1991
+ "step": 1290
1992
+ },
1993
+ {
1994
+ "epoch": 2.0833333333333335,
1995
+ "grad_norm": 0.6187256370987634,
1996
+ "learning_rate": 1.6983372921615202e-07,
1997
+ "logits/chosen": -0.2138671875,
1998
+ "logits/rejected": -0.458984375,
1999
+ "logps/chosen": -362.0,
2000
+ "logps/rejected": -370.0,
2001
+ "loss": 0.0123,
2002
+ "rewards/accuracies": 1.0,
2003
+ "rewards/chosen": 2.265625,
2004
+ "rewards/margins": 7.625,
2005
+ "rewards/rejected": -5.375,
2006
+ "step": 1300
2007
+ },
2008
+ {
2009
+ "epoch": 2.0993589743589745,
2010
+ "grad_norm": 0.7331904803685036,
2011
+ "learning_rate": 1.668646080760095e-07,
2012
+ "logits/chosen": -0.15234375,
2013
+ "logits/rejected": -0.40625,
2014
+ "logps/chosen": -354.0,
2015
+ "logps/rejected": -356.0,
2016
+ "loss": 0.0061,
2017
+ "rewards/accuracies": 1.0,
2018
+ "rewards/chosen": 2.59375,
2019
+ "rewards/margins": 7.65625,
2020
+ "rewards/rejected": -5.0625,
2021
+ "step": 1310
2022
+ },
2023
+ {
2024
+ "epoch": 2.1153846153846154,
2025
+ "grad_norm": 2.0296944258203036,
2026
+ "learning_rate": 1.6389548693586697e-07,
2027
+ "logits/chosen": -0.2412109375,
2028
+ "logits/rejected": -0.1748046875,
2029
+ "logps/chosen": -348.0,
2030
+ "logps/rejected": -348.0,
2031
+ "loss": 0.0231,
2032
+ "rewards/accuracies": 0.987500011920929,
2033
+ "rewards/chosen": 2.484375,
2034
+ "rewards/margins": 7.125,
2035
+ "rewards/rejected": -4.625,
2036
+ "step": 1320
2037
+ },
2038
+ {
2039
+ "epoch": 2.1314102564102564,
2040
+ "grad_norm": 23.71632295557207,
2041
+ "learning_rate": 1.6092636579572448e-07,
2042
+ "logits/chosen": -0.2275390625,
2043
+ "logits/rejected": -0.3125,
2044
+ "logps/chosen": -358.0,
2045
+ "logps/rejected": -342.0,
2046
+ "loss": 0.019,
2047
+ "rewards/accuracies": 1.0,
2048
+ "rewards/chosen": 2.78125,
2049
+ "rewards/margins": 7.90625,
2050
+ "rewards/rejected": -5.125,
2051
+ "step": 1330
2052
+ },
2053
+ {
2054
+ "epoch": 2.1474358974358974,
2055
+ "grad_norm": 4.284093803737362,
2056
+ "learning_rate": 1.5795724465558193e-07,
2057
+ "logits/chosen": -0.2255859375,
2058
+ "logits/rejected": -0.5546875,
2059
+ "logps/chosen": -364.0,
2060
+ "logps/rejected": -384.0,
2061
+ "loss": 0.0159,
2062
+ "rewards/accuracies": 1.0,
2063
+ "rewards/chosen": 2.484375,
2064
+ "rewards/margins": 7.4375,
2065
+ "rewards/rejected": -4.9375,
2066
+ "step": 1340
2067
+ },
2068
+ {
2069
+ "epoch": 2.1634615384615383,
2070
+ "grad_norm": 0.7862983762455222,
2071
+ "learning_rate": 1.549881235154394e-07,
2072
+ "logits/chosen": -0.1435546875,
2073
+ "logits/rejected": -0.337890625,
2074
+ "logps/chosen": -376.0,
2075
+ "logps/rejected": -354.0,
2076
+ "loss": 0.0114,
2077
+ "rewards/accuracies": 1.0,
2078
+ "rewards/chosen": 2.828125,
2079
+ "rewards/margins": 7.40625,
2080
+ "rewards/rejected": -4.59375,
2081
+ "step": 1350
2082
+ },
2083
+ {
2084
+ "epoch": 2.1794871794871793,
2085
+ "grad_norm": 1.8750426567792726,
2086
+ "learning_rate": 1.520190023752969e-07,
2087
+ "logits/chosen": -0.259765625,
2088
+ "logits/rejected": -0.59765625,
2089
+ "logps/chosen": -364.0,
2090
+ "logps/rejected": -330.0,
2091
+ "loss": 0.0138,
2092
+ "rewards/accuracies": 1.0,
2093
+ "rewards/chosen": 3.015625,
2094
+ "rewards/margins": 8.1875,
2095
+ "rewards/rejected": -5.15625,
2096
+ "step": 1360
2097
+ },
2098
+ {
2099
+ "epoch": 2.1955128205128207,
2100
+ "grad_norm": 7.170950559094129,
2101
+ "learning_rate": 1.490498812351544e-07,
2102
+ "logits/chosen": -0.251953125,
2103
+ "logits/rejected": -0.36328125,
2104
+ "logps/chosen": -362.0,
2105
+ "logps/rejected": -356.0,
2106
+ "loss": 0.0323,
2107
+ "rewards/accuracies": 1.0,
2108
+ "rewards/chosen": 3.0,
2109
+ "rewards/margins": 8.1875,
2110
+ "rewards/rejected": -5.1875,
2111
+ "step": 1370
2112
+ },
2113
+ {
2114
+ "epoch": 2.2115384615384617,
2115
+ "grad_norm": 21.23713813309802,
2116
+ "learning_rate": 1.4608076009501184e-07,
2117
+ "logits/chosen": -0.2431640625,
2118
+ "logits/rejected": -0.2353515625,
2119
+ "logps/chosen": -366.0,
2120
+ "logps/rejected": -356.0,
2121
+ "loss": 0.024,
2122
+ "rewards/accuracies": 1.0,
2123
+ "rewards/chosen": 2.6875,
2124
+ "rewards/margins": 7.8125,
2125
+ "rewards/rejected": -5.125,
2126
+ "step": 1380
2127
+ },
2128
+ {
2129
+ "epoch": 2.2275641025641026,
2130
+ "grad_norm": 7.169471258104354,
2131
+ "learning_rate": 1.4311163895486935e-07,
2132
+ "logits/chosen": -0.18359375,
2133
+ "logits/rejected": -0.29296875,
2134
+ "logps/chosen": -348.0,
2135
+ "logps/rejected": -364.0,
2136
+ "loss": 0.0257,
2137
+ "rewards/accuracies": 1.0,
2138
+ "rewards/chosen": 2.90625,
2139
+ "rewards/margins": 8.1875,
2140
+ "rewards/rejected": -5.25,
2141
+ "step": 1390
2142
+ },
2143
+ {
2144
+ "epoch": 2.2435897435897436,
2145
+ "grad_norm": 4.891546899421327,
2146
+ "learning_rate": 1.4014251781472683e-07,
2147
+ "logits/chosen": -0.22265625,
2148
+ "logits/rejected": -0.55078125,
2149
+ "logps/chosen": -344.0,
2150
+ "logps/rejected": -374.0,
2151
+ "loss": 0.0165,
2152
+ "rewards/accuracies": 0.987500011920929,
2153
+ "rewards/chosen": 2.71875,
2154
+ "rewards/margins": 7.4375,
2155
+ "rewards/rejected": -4.71875,
2156
+ "step": 1400
2157
+ },
2158
+ {
2159
+ "epoch": 2.2596153846153846,
2160
+ "grad_norm": 14.0088561337579,
2161
+ "learning_rate": 1.3717339667458433e-07,
2162
+ "logits/chosen": -0.171875,
2163
+ "logits/rejected": -0.375,
2164
+ "logps/chosen": -336.0,
2165
+ "logps/rejected": -380.0,
2166
+ "loss": 0.023,
2167
+ "rewards/accuracies": 1.0,
2168
+ "rewards/chosen": 2.328125,
2169
+ "rewards/margins": 8.125,
2170
+ "rewards/rejected": -5.8125,
2171
+ "step": 1410
2172
+ },
2173
+ {
2174
+ "epoch": 2.2756410256410255,
2175
+ "grad_norm": 5.478498841262275,
2176
+ "learning_rate": 1.342042755344418e-07,
2177
+ "logits/chosen": -0.244140625,
2178
+ "logits/rejected": -0.60546875,
2179
+ "logps/chosen": -340.0,
2180
+ "logps/rejected": -368.0,
2181
+ "loss": 0.0101,
2182
+ "rewards/accuracies": 1.0,
2183
+ "rewards/chosen": 2.078125,
2184
+ "rewards/margins": 7.125,
2185
+ "rewards/rejected": -5.0625,
2186
+ "step": 1420
2187
+ },
2188
+ {
2189
+ "epoch": 2.2916666666666665,
2190
+ "grad_norm": 4.08870815918569,
2191
+ "learning_rate": 1.3123515439429926e-07,
2192
+ "logits/chosen": -0.216796875,
2193
+ "logits/rejected": -0.5,
2194
+ "logps/chosen": -348.0,
2195
+ "logps/rejected": -380.0,
2196
+ "loss": 0.0094,
2197
+ "rewards/accuracies": 1.0,
2198
+ "rewards/chosen": 2.40625,
2199
+ "rewards/margins": 7.875,
2200
+ "rewards/rejected": -5.46875,
2201
+ "step": 1430
2202
+ },
2203
+ {
2204
+ "epoch": 2.3076923076923075,
2205
+ "grad_norm": 4.0828939935609885,
2206
+ "learning_rate": 1.2826603325415677e-07,
2207
+ "logits/chosen": -0.310546875,
2208
+ "logits/rejected": -0.3515625,
2209
+ "logps/chosen": -362.0,
2210
+ "logps/rejected": -372.0,
2211
+ "loss": 0.0115,
2212
+ "rewards/accuracies": 1.0,
2213
+ "rewards/chosen": 1.9296875,
2214
+ "rewards/margins": 7.90625,
2215
+ "rewards/rejected": -5.96875,
2216
+ "step": 1440
2217
+ },
2218
+ {
2219
+ "epoch": 2.323717948717949,
2220
+ "grad_norm": 2.2611774310220714,
2221
+ "learning_rate": 1.2529691211401425e-07,
2222
+ "logits/chosen": -0.2890625,
2223
+ "logits/rejected": -0.3671875,
2224
+ "logps/chosen": -350.0,
2225
+ "logps/rejected": -380.0,
2226
+ "loss": 0.0071,
2227
+ "rewards/accuracies": 1.0,
2228
+ "rewards/chosen": 2.359375,
2229
+ "rewards/margins": 8.1875,
2230
+ "rewards/rejected": -5.84375,
2231
+ "step": 1450
2232
+ },
2233
+ {
2234
+ "epoch": 2.33974358974359,
2235
+ "grad_norm": 35.51723177781465,
2236
+ "learning_rate": 1.2232779097387173e-07,
2237
+ "logits/chosen": -0.244140625,
2238
+ "logits/rejected": -0.36328125,
2239
+ "logps/chosen": -378.0,
2240
+ "logps/rejected": -368.0,
2241
+ "loss": 0.0773,
2242
+ "rewards/accuracies": 1.0,
2243
+ "rewards/chosen": 2.078125,
2244
+ "rewards/margins": 8.125,
2245
+ "rewards/rejected": -6.0,
2246
+ "step": 1460
2247
+ },
2248
+ {
2249
+ "epoch": 2.355769230769231,
2250
+ "grad_norm": 5.534009913713299,
2251
+ "learning_rate": 1.193586698337292e-07,
2252
+ "logits/chosen": -0.1767578125,
2253
+ "logits/rejected": -0.5390625,
2254
+ "logps/chosen": -368.0,
2255
+ "logps/rejected": -358.0,
2256
+ "loss": 0.012,
2257
+ "rewards/accuracies": 1.0,
2258
+ "rewards/chosen": 2.484375,
2259
+ "rewards/margins": 8.0625,
2260
+ "rewards/rejected": -5.59375,
2261
+ "step": 1470
2262
+ },
2263
+ {
2264
+ "epoch": 2.371794871794872,
2265
+ "grad_norm": 18.31189968205467,
2266
+ "learning_rate": 1.163895486935867e-07,
2267
+ "logits/chosen": -0.25390625,
2268
+ "logits/rejected": -0.353515625,
2269
+ "logps/chosen": -368.0,
2270
+ "logps/rejected": -356.0,
2271
+ "loss": 0.0359,
2272
+ "rewards/accuracies": 0.9750000238418579,
2273
+ "rewards/chosen": 2.3125,
2274
+ "rewards/margins": 7.78125,
2275
+ "rewards/rejected": -5.46875,
2276
+ "step": 1480
2277
+ },
2278
+ {
2279
+ "epoch": 2.3878205128205128,
2280
+ "grad_norm": 14.190561755907082,
2281
+ "learning_rate": 1.1342042755344417e-07,
2282
+ "logits/chosen": -0.1484375,
2283
+ "logits/rejected": -0.35546875,
2284
+ "logps/chosen": -376.0,
2285
+ "logps/rejected": -370.0,
2286
+ "loss": 0.0065,
2287
+ "rewards/accuracies": 1.0,
2288
+ "rewards/chosen": 2.734375,
2289
+ "rewards/margins": 8.4375,
2290
+ "rewards/rejected": -5.6875,
2291
+ "step": 1490
2292
+ },
2293
+ {
2294
+ "epoch": 2.4038461538461537,
2295
+ "grad_norm": 57.38633989383038,
2296
+ "learning_rate": 1.1045130641330165e-07,
2297
+ "logits/chosen": -0.2177734375,
2298
+ "logits/rejected": -0.24609375,
2299
+ "logps/chosen": -356.0,
2300
+ "logps/rejected": -376.0,
2301
+ "loss": 0.0762,
2302
+ "rewards/accuracies": 0.949999988079071,
2303
+ "rewards/chosen": 1.25,
2304
+ "rewards/margins": 7.25,
2305
+ "rewards/rejected": -6.0,
2306
+ "step": 1500
2307
+ },
2308
+ {
2309
+ "epoch": 2.4198717948717947,
2310
+ "grad_norm": 13.945337786807688,
2311
+ "learning_rate": 1.0748218527315913e-07,
2312
+ "logits/chosen": -0.2353515625,
2313
+ "logits/rejected": -0.486328125,
2314
+ "logps/chosen": -374.0,
2315
+ "logps/rejected": -386.0,
2316
+ "loss": 0.0146,
2317
+ "rewards/accuracies": 1.0,
2318
+ "rewards/chosen": 2.28125,
2319
+ "rewards/margins": 8.625,
2320
+ "rewards/rejected": -6.34375,
2321
+ "step": 1510
2322
+ },
2323
+ {
2324
+ "epoch": 2.435897435897436,
2325
+ "grad_norm": 2.025562206086781,
2326
+ "learning_rate": 1.0451306413301662e-07,
2327
+ "logits/chosen": -0.2890625,
2328
+ "logits/rejected": -0.404296875,
2329
+ "logps/chosen": -362.0,
2330
+ "logps/rejected": -360.0,
2331
+ "loss": 0.0058,
2332
+ "rewards/accuracies": 1.0,
2333
+ "rewards/chosen": 2.96875,
2334
+ "rewards/margins": 8.875,
2335
+ "rewards/rejected": -5.90625,
2336
+ "step": 1520
2337
+ },
2338
+ {
2339
+ "epoch": 2.451923076923077,
2340
+ "grad_norm": 2.5818458781278997,
2341
+ "learning_rate": 1.0154394299287411e-07,
2342
+ "logits/chosen": -0.20703125,
2343
+ "logits/rejected": -0.380859375,
2344
+ "logps/chosen": -356.0,
2345
+ "logps/rejected": -378.0,
2346
+ "loss": 0.0084,
2347
+ "rewards/accuracies": 1.0,
2348
+ "rewards/chosen": 2.75,
2349
+ "rewards/margins": 8.375,
2350
+ "rewards/rejected": -5.59375,
2351
+ "step": 1530
2352
+ },
2353
+ {
2354
+ "epoch": 2.467948717948718,
2355
+ "grad_norm": 0.5147935469188503,
2356
+ "learning_rate": 9.857482185273158e-08,
2357
+ "logits/chosen": -0.240234375,
2358
+ "logits/rejected": -0.47265625,
2359
+ "logps/chosen": -348.0,
2360
+ "logps/rejected": -354.0,
2361
+ "loss": 0.0156,
2362
+ "rewards/accuracies": 1.0,
2363
+ "rewards/chosen": 2.609375,
2364
+ "rewards/margins": 8.125,
2365
+ "rewards/rejected": -5.46875,
2366
+ "step": 1540
2367
+ },
2368
+ {
2369
+ "epoch": 2.483974358974359,
2370
+ "grad_norm": 2.5527476910216764,
2371
+ "learning_rate": 9.560570071258907e-08,
2372
+ "logits/chosen": -0.1748046875,
2373
+ "logits/rejected": -0.392578125,
2374
+ "logps/chosen": -378.0,
2375
+ "logps/rejected": -336.0,
2376
+ "loss": 0.0132,
2377
+ "rewards/accuracies": 1.0,
2378
+ "rewards/chosen": 2.34375,
2379
+ "rewards/margins": 8.125,
2380
+ "rewards/rejected": -5.8125,
2381
+ "step": 1550
2382
+ },
2383
+ {
2384
+ "epoch": 2.5,
2385
+ "grad_norm": 2.6084669807533807,
2386
+ "learning_rate": 9.263657957244655e-08,
2387
+ "logits/chosen": -0.28125,
2388
+ "logits/rejected": -0.40234375,
2389
+ "logps/chosen": -340.0,
2390
+ "logps/rejected": -386.0,
2391
+ "loss": 0.0112,
2392
+ "rewards/accuracies": 0.987500011920929,
2393
+ "rewards/chosen": 2.78125,
2394
+ "rewards/margins": 8.4375,
2395
+ "rewards/rejected": -5.6875,
2396
+ "step": 1560
2397
+ },
2398
+ {
2399
+ "epoch": 2.516025641025641,
2400
+ "grad_norm": 0.5142214115866551,
2401
+ "learning_rate": 8.966745843230403e-08,
2402
+ "logits/chosen": -0.26953125,
2403
+ "logits/rejected": -0.34765625,
2404
+ "logps/chosen": -332.0,
2405
+ "logps/rejected": -338.0,
2406
+ "loss": 0.0087,
2407
+ "rewards/accuracies": 1.0,
2408
+ "rewards/chosen": 2.625,
2409
+ "rewards/margins": 8.25,
2410
+ "rewards/rejected": -5.625,
2411
+ "step": 1570
2412
+ },
2413
+ {
2414
+ "epoch": 2.532051282051282,
2415
+ "grad_norm": 0.4686133876015494,
2416
+ "learning_rate": 8.669833729216151e-08,
2417
+ "logits/chosen": -0.2080078125,
2418
+ "logits/rejected": -0.36328125,
2419
+ "logps/chosen": -374.0,
2420
+ "logps/rejected": -382.0,
2421
+ "loss": 0.0066,
2422
+ "rewards/accuracies": 1.0,
2423
+ "rewards/chosen": 2.0625,
2424
+ "rewards/margins": 7.84375,
2425
+ "rewards/rejected": -5.78125,
2426
+ "step": 1580
2427
+ },
2428
+ {
2429
+ "epoch": 2.5480769230769234,
2430
+ "grad_norm": 4.377458752115824,
2431
+ "learning_rate": 8.3729216152019e-08,
2432
+ "logits/chosen": -0.240234375,
2433
+ "logits/rejected": -0.50390625,
2434
+ "logps/chosen": -358.0,
2435
+ "logps/rejected": -370.0,
2436
+ "loss": 0.0077,
2437
+ "rewards/accuracies": 1.0,
2438
+ "rewards/chosen": 2.375,
2439
+ "rewards/margins": 8.4375,
2440
+ "rewards/rejected": -6.03125,
2441
+ "step": 1590
2442
+ },
2443
+ {
2444
+ "epoch": 2.564102564102564,
2445
+ "grad_norm": 8.575422939319253,
2446
+ "learning_rate": 8.076009501187649e-08,
2447
+ "logits/chosen": -0.2490234375,
2448
+ "logits/rejected": -0.337890625,
2449
+ "logps/chosen": -348.0,
2450
+ "logps/rejected": -370.0,
2451
+ "loss": 0.0241,
2452
+ "rewards/accuracies": 0.987500011920929,
2453
+ "rewards/chosen": 2.484375,
2454
+ "rewards/margins": 7.5,
2455
+ "rewards/rejected": -5.0,
2456
+ "step": 1600
2457
+ },
2458
+ {
2459
+ "epoch": 2.5801282051282053,
2460
+ "grad_norm": 2.551052943978534,
2461
+ "learning_rate": 7.779097387173396e-08,
2462
+ "logits/chosen": -0.2734375,
2463
+ "logits/rejected": -0.375,
2464
+ "logps/chosen": -342.0,
2465
+ "logps/rejected": -360.0,
2466
+ "loss": 0.0082,
2467
+ "rewards/accuracies": 1.0,
2468
+ "rewards/chosen": 2.8125,
2469
+ "rewards/margins": 7.8125,
2470
+ "rewards/rejected": -4.96875,
2471
+ "step": 1610
2472
+ },
2473
+ {
2474
+ "epoch": 2.5961538461538463,
2475
+ "grad_norm": 6.000172766868698,
2476
+ "learning_rate": 7.482185273159145e-08,
2477
+ "logits/chosen": -0.263671875,
2478
+ "logits/rejected": -0.451171875,
2479
+ "logps/chosen": -368.0,
2480
+ "logps/rejected": -378.0,
2481
+ "loss": 0.0303,
2482
+ "rewards/accuracies": 1.0,
2483
+ "rewards/chosen": 2.46875,
2484
+ "rewards/margins": 7.96875,
2485
+ "rewards/rejected": -5.5,
2486
+ "step": 1620
2487
+ },
2488
+ {
2489
+ "epoch": 2.6121794871794872,
2490
+ "grad_norm": 19.722033841376266,
2491
+ "learning_rate": 7.185273159144893e-08,
2492
+ "logits/chosen": -0.294921875,
2493
+ "logits/rejected": -0.392578125,
2494
+ "logps/chosen": -366.0,
2495
+ "logps/rejected": -398.0,
2496
+ "loss": 0.0123,
2497
+ "rewards/accuracies": 1.0,
2498
+ "rewards/chosen": 2.71875,
2499
+ "rewards/margins": 8.25,
2500
+ "rewards/rejected": -5.53125,
2501
+ "step": 1630
2502
+ },
2503
+ {
2504
+ "epoch": 2.628205128205128,
2505
+ "grad_norm": 11.30540085696599,
2506
+ "learning_rate": 6.88836104513064e-08,
2507
+ "logits/chosen": -0.18359375,
2508
+ "logits/rejected": -0.314453125,
2509
+ "logps/chosen": -380.0,
2510
+ "logps/rejected": -354.0,
2511
+ "loss": 0.0078,
2512
+ "rewards/accuracies": 1.0,
2513
+ "rewards/chosen": 2.734375,
2514
+ "rewards/margins": 8.6875,
2515
+ "rewards/rejected": -5.96875,
2516
+ "step": 1640
2517
+ },
2518
+ {
2519
+ "epoch": 2.644230769230769,
2520
+ "grad_norm": 1.1773482574998353,
2521
+ "learning_rate": 6.591448931116388e-08,
2522
+ "logits/chosen": -0.162109375,
2523
+ "logits/rejected": -0.4609375,
2524
+ "logps/chosen": -378.0,
2525
+ "logps/rejected": -374.0,
2526
+ "loss": 0.0175,
2527
+ "rewards/accuracies": 0.9750000238418579,
2528
+ "rewards/chosen": 2.125,
2529
+ "rewards/margins": 8.0625,
2530
+ "rewards/rejected": -5.9375,
2531
+ "step": 1650
2532
+ },
2533
+ {
2534
+ "epoch": 2.66025641025641,
2535
+ "grad_norm": 52.67450880620409,
2536
+ "learning_rate": 6.294536817102138e-08,
2537
+ "logits/chosen": -0.294921875,
2538
+ "logits/rejected": -0.58203125,
2539
+ "logps/chosen": -386.0,
2540
+ "logps/rejected": -376.0,
2541
+ "loss": 0.0316,
2542
+ "rewards/accuracies": 0.987500011920929,
2543
+ "rewards/chosen": 1.828125,
2544
+ "rewards/margins": 7.53125,
2545
+ "rewards/rejected": -5.6875,
2546
+ "step": 1660
2547
+ },
2548
+ {
2549
+ "epoch": 2.676282051282051,
2550
+ "grad_norm": 2.444649208747581,
2551
+ "learning_rate": 5.997624703087885e-08,
2552
+ "logits/chosen": -0.2734375,
2553
+ "logits/rejected": -0.30859375,
2554
+ "logps/chosen": -348.0,
2555
+ "logps/rejected": -330.0,
2556
+ "loss": 0.0139,
2557
+ "rewards/accuracies": 1.0,
2558
+ "rewards/chosen": 2.34375,
2559
+ "rewards/margins": 7.96875,
2560
+ "rewards/rejected": -5.625,
2561
+ "step": 1670
2562
+ },
2563
+ {
2564
+ "epoch": 2.6923076923076925,
2565
+ "grad_norm": 1.8309710467577502,
2566
+ "learning_rate": 5.700712589073634e-08,
2567
+ "logits/chosen": -0.33203125,
2568
+ "logits/rejected": -0.5546875,
2569
+ "logps/chosen": -368.0,
2570
+ "logps/rejected": -372.0,
2571
+ "loss": 0.0044,
2572
+ "rewards/accuracies": 1.0,
2573
+ "rewards/chosen": 2.578125,
2574
+ "rewards/margins": 8.3125,
2575
+ "rewards/rejected": -5.75,
2576
+ "step": 1680
2577
+ },
2578
+ {
2579
+ "epoch": 2.7083333333333335,
2580
+ "grad_norm": 0.974183927512936,
2581
+ "learning_rate": 5.4038004750593824e-08,
2582
+ "logits/chosen": -0.212890625,
2583
+ "logits/rejected": -0.384765625,
2584
+ "logps/chosen": -370.0,
2585
+ "logps/rejected": -366.0,
2586
+ "loss": 0.0061,
2587
+ "rewards/accuracies": 1.0,
2588
+ "rewards/chosen": 2.4375,
2589
+ "rewards/margins": 8.625,
2590
+ "rewards/rejected": -6.21875,
2591
+ "step": 1690
2592
+ },
2593
+ {
2594
+ "epoch": 2.7243589743589745,
2595
+ "grad_norm": 8.112271810670284,
2596
+ "learning_rate": 5.10688836104513e-08,
2597
+ "logits/chosen": -0.287109375,
2598
+ "logits/rejected": -0.34375,
2599
+ "logps/chosen": -364.0,
2600
+ "logps/rejected": -384.0,
2601
+ "loss": 0.0074,
2602
+ "rewards/accuracies": 1.0,
2603
+ "rewards/chosen": 2.53125,
2604
+ "rewards/margins": 8.625,
2605
+ "rewards/rejected": -6.09375,
2606
+ "step": 1700
2607
+ },
2608
+ {
2609
+ "epoch": 2.7403846153846154,
2610
+ "grad_norm": 48.323303487774965,
2611
+ "learning_rate": 4.809976247030879e-08,
2612
+ "logits/chosen": -0.2734375,
2613
+ "logits/rejected": -0.39453125,
2614
+ "logps/chosen": -378.0,
2615
+ "logps/rejected": -368.0,
2616
+ "loss": 0.0187,
2617
+ "rewards/accuracies": 1.0,
2618
+ "rewards/chosen": 2.28125,
2619
+ "rewards/margins": 7.9375,
2620
+ "rewards/rejected": -5.65625,
2621
+ "step": 1710
2622
+ },
2623
+ {
2624
+ "epoch": 2.7564102564102564,
2625
+ "grad_norm": 0.6216279700601488,
2626
+ "learning_rate": 4.5130641330166267e-08,
2627
+ "logits/chosen": -0.2470703125,
2628
+ "logits/rejected": -0.3828125,
2629
+ "logps/chosen": -340.0,
2630
+ "logps/rejected": -372.0,
2631
+ "loss": 0.0305,
2632
+ "rewards/accuracies": 0.987500011920929,
2633
+ "rewards/chosen": 2.25,
2634
+ "rewards/margins": 7.6875,
2635
+ "rewards/rejected": -5.4375,
2636
+ "step": 1720
2637
+ },
2638
+ {
2639
+ "epoch": 2.7724358974358974,
2640
+ "grad_norm": 8.386988307728698,
2641
+ "learning_rate": 4.216152019002375e-08,
2642
+ "logits/chosen": -0.2060546875,
2643
+ "logits/rejected": -0.349609375,
2644
+ "logps/chosen": -360.0,
2645
+ "logps/rejected": -364.0,
2646
+ "loss": 0.0062,
2647
+ "rewards/accuracies": 1.0,
2648
+ "rewards/chosen": 2.734375,
2649
+ "rewards/margins": 8.4375,
2650
+ "rewards/rejected": -5.6875,
2651
+ "step": 1730
2652
+ },
2653
+ {
2654
+ "epoch": 2.7884615384615383,
2655
+ "grad_norm": 2.6310329285858587,
2656
+ "learning_rate": 3.919239904988123e-08,
2657
+ "logits/chosen": -0.1826171875,
2658
+ "logits/rejected": -0.333984375,
2659
+ "logps/chosen": -358.0,
2660
+ "logps/rejected": -390.0,
2661
+ "loss": 0.0088,
2662
+ "rewards/accuracies": 1.0,
2663
+ "rewards/chosen": 2.28125,
2664
+ "rewards/margins": 7.875,
2665
+ "rewards/rejected": -5.59375,
2666
+ "step": 1740
2667
+ },
2668
+ {
2669
+ "epoch": 2.8044871794871797,
2670
+ "grad_norm": 3.066349322026837,
2671
+ "learning_rate": 3.6223277909738715e-08,
2672
+ "logits/chosen": -0.2236328125,
2673
+ "logits/rejected": -0.357421875,
2674
+ "logps/chosen": -350.0,
2675
+ "logps/rejected": -368.0,
2676
+ "loss": 0.0092,
2677
+ "rewards/accuracies": 1.0,
2678
+ "rewards/chosen": 2.40625,
2679
+ "rewards/margins": 7.96875,
2680
+ "rewards/rejected": -5.5625,
2681
+ "step": 1750
2682
+ },
2683
+ {
2684
+ "epoch": 2.8205128205128203,
2685
+ "grad_norm": 8.079242737705172,
2686
+ "learning_rate": 3.32541567695962e-08,
2687
+ "logits/chosen": -0.0966796875,
2688
+ "logits/rejected": -0.2431640625,
2689
+ "logps/chosen": -342.0,
2690
+ "logps/rejected": -360.0,
2691
+ "loss": 0.0237,
2692
+ "rewards/accuracies": 1.0,
2693
+ "rewards/chosen": 2.421875,
2694
+ "rewards/margins": 8.4375,
2695
+ "rewards/rejected": -6.03125,
2696
+ "step": 1760
2697
+ },
2698
+ {
2699
+ "epoch": 2.8365384615384617,
2700
+ "grad_norm": 36.85905904660706,
2701
+ "learning_rate": 3.028503562945368e-08,
2702
+ "logits/chosen": -0.1962890625,
2703
+ "logits/rejected": -0.1435546875,
2704
+ "logps/chosen": -376.0,
2705
+ "logps/rejected": -366.0,
2706
+ "loss": 0.0138,
2707
+ "rewards/accuracies": 1.0,
2708
+ "rewards/chosen": 3.015625,
2709
+ "rewards/margins": 8.75,
2710
+ "rewards/rejected": -5.71875,
2711
+ "step": 1770
2712
+ },
2713
+ {
2714
+ "epoch": 2.8525641025641026,
2715
+ "grad_norm": 1.3700769518291278,
2716
+ "learning_rate": 2.7315914489311164e-08,
2717
+ "logits/chosen": -0.177734375,
2718
+ "logits/rejected": -0.29296875,
2719
+ "logps/chosen": -354.0,
2720
+ "logps/rejected": -366.0,
2721
+ "loss": 0.0047,
2722
+ "rewards/accuracies": 1.0,
2723
+ "rewards/chosen": 2.1875,
2724
+ "rewards/margins": 8.375,
2725
+ "rewards/rejected": -6.15625,
2726
+ "step": 1780
2727
+ },
2728
+ {
2729
+ "epoch": 2.8685897435897436,
2730
+ "grad_norm": 2.5772413830882437,
2731
+ "learning_rate": 2.4346793349168646e-08,
2732
+ "logits/chosen": -0.3515625,
2733
+ "logits/rejected": -0.40625,
2734
+ "logps/chosen": -356.0,
2735
+ "logps/rejected": -380.0,
2736
+ "loss": 0.0071,
2737
+ "rewards/accuracies": 1.0,
2738
+ "rewards/chosen": 2.265625,
2739
+ "rewards/margins": 8.25,
2740
+ "rewards/rejected": -5.96875,
2741
+ "step": 1790
2742
+ },
2743
+ {
2744
+ "epoch": 2.8846153846153846,
2745
+ "grad_norm": 0.2522421688634616,
2746
+ "learning_rate": 2.1377672209026125e-08,
2747
+ "logits/chosen": -0.1884765625,
2748
+ "logits/rejected": -0.296875,
2749
+ "logps/chosen": -354.0,
2750
+ "logps/rejected": -362.0,
2751
+ "loss": 0.0155,
2752
+ "rewards/accuracies": 0.987500011920929,
2753
+ "rewards/chosen": 2.609375,
2754
+ "rewards/margins": 8.625,
2755
+ "rewards/rejected": -5.96875,
2756
+ "step": 1800
2757
+ },
2758
+ {
2759
+ "epoch": 2.9006410256410255,
2760
+ "grad_norm": 2.0378946111088347,
2761
+ "learning_rate": 1.840855106888361e-08,
2762
+ "logits/chosen": -0.298828125,
2763
+ "logits/rejected": -0.4765625,
2764
+ "logps/chosen": -346.0,
2765
+ "logps/rejected": -370.0,
2766
+ "loss": 0.0427,
2767
+ "rewards/accuracies": 0.9750000238418579,
2768
+ "rewards/chosen": 1.828125,
2769
+ "rewards/margins": 7.90625,
2770
+ "rewards/rejected": -6.0625,
2771
+ "step": 1810
2772
+ },
2773
+ {
2774
+ "epoch": 2.9166666666666665,
2775
+ "grad_norm": 2.2386092025677784,
2776
+ "learning_rate": 1.5439429928741092e-08,
2777
+ "logits/chosen": -0.21875,
2778
+ "logits/rejected": -0.2138671875,
2779
+ "logps/chosen": -380.0,
2780
+ "logps/rejected": -372.0,
2781
+ "loss": 0.0179,
2782
+ "rewards/accuracies": 0.987500011920929,
2783
+ "rewards/chosen": 1.84375,
2784
+ "rewards/margins": 8.5,
2785
+ "rewards/rejected": -6.65625,
2786
+ "step": 1820
2787
+ },
2788
+ {
2789
+ "epoch": 2.9326923076923075,
2790
+ "grad_norm": 5.606669828245322,
2791
+ "learning_rate": 1.2470308788598574e-08,
2792
+ "logits/chosen": -0.26953125,
2793
+ "logits/rejected": -0.224609375,
2794
+ "logps/chosen": -364.0,
2795
+ "logps/rejected": -346.0,
2796
+ "loss": 0.0302,
2797
+ "rewards/accuracies": 1.0,
2798
+ "rewards/chosen": 2.5625,
2799
+ "rewards/margins": 8.5625,
2800
+ "rewards/rejected": -5.96875,
2801
+ "step": 1830
2802
+ },
2803
+ {
2804
+ "epoch": 2.948717948717949,
2805
+ "grad_norm": 7.459897331240726,
2806
+ "learning_rate": 9.501187648456057e-09,
2807
+ "logits/chosen": -0.166015625,
2808
+ "logits/rejected": -0.27734375,
2809
+ "logps/chosen": -338.0,
2810
+ "logps/rejected": -372.0,
2811
+ "loss": 0.0069,
2812
+ "rewards/accuracies": 1.0,
2813
+ "rewards/chosen": 1.9453125,
2814
+ "rewards/margins": 8.375,
2815
+ "rewards/rejected": -6.40625,
2816
+ "step": 1840
2817
+ },
2818
+ {
2819
+ "epoch": 2.96474358974359,
2820
+ "grad_norm": 46.309709716339626,
2821
+ "learning_rate": 6.532066508313539e-09,
2822
+ "logits/chosen": -0.265625,
2823
+ "logits/rejected": -0.3046875,
2824
+ "logps/chosen": -368.0,
2825
+ "logps/rejected": -392.0,
2826
+ "loss": 0.0144,
2827
+ "rewards/accuracies": 1.0,
2828
+ "rewards/chosen": 2.125,
2829
+ "rewards/margins": 8.5,
2830
+ "rewards/rejected": -6.375,
2831
+ "step": 1850
2832
+ },
2833
+ {
2834
+ "epoch": 2.980769230769231,
2835
+ "grad_norm": 2.757504775523463,
2836
+ "learning_rate": 3.562945368171021e-09,
2837
+ "logits/chosen": -0.2314453125,
2838
+ "logits/rejected": -0.3515625,
2839
+ "logps/chosen": -366.0,
2840
+ "logps/rejected": -382.0,
2841
+ "loss": 0.0081,
2842
+ "rewards/accuracies": 1.0,
2843
+ "rewards/chosen": 2.3125,
2844
+ "rewards/margins": 8.125,
2845
+ "rewards/rejected": -5.84375,
2846
+ "step": 1860
2847
+ },
2848
+ {
2849
+ "epoch": 2.996794871794872,
2850
+ "grad_norm": 0.7690916064350508,
2851
+ "learning_rate": 5.938242280285036e-10,
2852
+ "logits/chosen": -0.146484375,
2853
+ "logits/rejected": -0.361328125,
2854
+ "logps/chosen": -352.0,
2855
+ "logps/rejected": -378.0,
2856
+ "loss": 0.0214,
2857
+ "rewards/accuracies": 1.0,
2858
+ "rewards/chosen": 2.9375,
2859
+ "rewards/margins": 8.75,
2860
+ "rewards/rejected": -5.84375,
2861
+ "step": 1870
2862
+ },
2863
+ {
2864
+ "epoch": 3.0,
2865
+ "eval_logits/chosen": -0.2392578125,
2866
+ "eval_logits/rejected": -0.2734375,
2867
+ "eval_logps/chosen": -364.0,
2868
+ "eval_logps/rejected": -370.0,
2869
+ "eval_loss": 0.21664032340049744,
2870
+ "eval_rewards/accuracies": 0.9134615659713745,
2871
+ "eval_rewards/chosen": 1.7734375,
2872
+ "eval_rewards/margins": 6.65625,
2873
+ "eval_rewards/rejected": -4.875,
2874
+ "eval_runtime": 26.3085,
2875
+ "eval_samples_per_second": 7.602,
2876
+ "eval_steps_per_second": 0.494,
2877
+ "step": 1872
2878
  }
2879
  ],
2880
  "logging_steps": 10,
 
2889
  "should_evaluate": false,
2890
  "should_log": false,
2891
  "should_save": true,
2892
+ "should_training_stop": true
2893
  },
2894
  "attributes": {}
2895
  }