Commit
·
2581e08
1
Parent(s):
eaa1e52
drgrpo checkpoints
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/added_tokens.json +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/config.json +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/generation_config.json +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/merges.txt +0 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00002-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00003-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00004-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00005-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00006-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model.safetensors.index.json +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/special_tokens_map.json +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/tokenizer.json +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/tokenizer_config.json +3 -0
- qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/vocab.json +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/added_tokens.json +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/config.json +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/generation_config.json +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/merges.txt +0 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00001-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00002-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00003-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00004-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00005-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00006-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model.safetensors.index.json +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/special_tokens_map.json +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/tokenizer.json +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/tokenizer_config.json +3 -0
- qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/vocab.json +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/added_tokens.json +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/config.json +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/generation_config.json +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/merges.txt +0 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00002-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00003-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00004-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00005-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00006-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model.safetensors.index.json +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/special_tokens_map.json +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/tokenizer.json +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/tokenizer_config.json +3 -0
- qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/vocab.json +3 -0
- qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/added_tokens.json +3 -0
- qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/config.json +3 -0
- qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/generation_config.json +3 -0
- qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/merges.txt +0 -0
- qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors +3 -0
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
|
3 |
+
size 80
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
|
3 |
+
size 1008
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/generation_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
|
3 |
+
size 139
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6cf00fe40c951c70dbba96e30b603231aa4b860315a4fe7cb522c08962ce90e
|
3 |
+
size 4996577736
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00002-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:621369869da69cdef92c80c8a92728aa910648375af6fa746ba45654872a72d5
|
3 |
+
size 4996347752
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00003-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a45e33783fa9d7dcda77d9f235cbc59bc6c09a73dba9af8407dd0426769ed0c
|
3 |
+
size 4997127120
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00004-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1f5b3f761a8bd21ce3e8394aebcd4f26b5e6649f780d28abe0d3c7cb276e00c
|
3 |
+
size 4985592520
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00005-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:640e7208b56c408c2e4d2a4c714b2a8bb557bfbca994d3371801feaff323c08f
|
3 |
+
size 4996348976
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00006-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e089c7ff3af95e4d70282b220159cdd50c92d0647f30482a740fe24784eeb0fe
|
3 |
+
size 3660151400
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model.safetensors.index.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
|
3 |
+
size 416452
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
|
3 |
+
size 370
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
|
3 |
+
size 11418365
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
|
3 |
+
size 1331
|
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
|
3 |
+
size 2776833
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
|
3 |
+
size 80
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
|
3 |
+
size 1008
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/generation_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
|
3 |
+
size 139
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00001-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4785618a3e3c0a5116add20387b923616be63530b212e1458af5a662ac2a8d5
|
3 |
+
size 4996577736
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00002-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ae9a49d94b34c7a37e65a293054fbff92bd806b0a6def207475c6905f03e2eb
|
3 |
+
size 4996347752
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00003-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa19a71355afadf83082058520cf2abda37f1d0396cca0c817fcb0dc40523641
|
3 |
+
size 4997127120
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00004-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:441aac4666355f28be24279646b2bf827b8195aac008d1351720e2f7f2fc3d95
|
3 |
+
size 4985592520
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00005-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:974946ca8de0ffd6e4e16a06fa331c0ad2b160f70492fc19bf28166644cac413
|
3 |
+
size 4996348976
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00006-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd2773e22bfd441db8f80f00cd1f3ea2c9865d30425a023ef245b3dd339927d2
|
3 |
+
size 3660151400
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model.safetensors.index.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
|
3 |
+
size 416452
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
|
3 |
+
size 370
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
|
3 |
+
size 11418365
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
|
3 |
+
size 1331
|
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
|
3 |
+
size 2776833
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
|
3 |
+
size 80
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
|
3 |
+
size 1008
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/generation_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
|
3 |
+
size 139
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c29fb7a17f2e3f8a1d312a27896dc220da13d844f1ef271da14f03017608e6ba
|
3 |
+
size 4996577736
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00002-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3fc32704ab3b4125f8319c7928a253efb8cfcbd50419881009b1322e112c669
|
3 |
+
size 4996347752
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00003-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87af473fc7b12f0b9cc65c1ade1b3f58aee3b0eabcc1f1b8a65e302daca053d1
|
3 |
+
size 4997127120
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00004-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3444360f8315ba1f1883d7b331093301efb9ec9182c1ac06ea741434aae472c
|
3 |
+
size 4985592520
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00005-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ee65329334e31f74b559fbe8942065f4be3c4650bdeca097c11c6c0ca5e43b4
|
3 |
+
size 4996348976
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00006-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db0dd3986f8bebda6cc42668bb174139c06c0ea20192f7564a6a4bcf9ece3f16
|
3 |
+
size 3660151400
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model.safetensors.index.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
|
3 |
+
size 416452
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
|
3 |
+
size 370
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
|
3 |
+
size 11418365
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
|
3 |
+
size 1331
|
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
|
3 |
+
size 2776833
|
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
|
3 |
+
size 80
|
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
|
3 |
+
size 1008
|
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/generation_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
|
3 |
+
size 139
|
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:582f8f4546a336d66d626898ea06eff4d217e4e4e171c46ced44167972ae88dc
|
3 |
+
size 4996577736
|