shengyi-qian commited on
Commit
2581e08
·
1 Parent(s): eaa1e52

drgrpo checkpoints

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/added_tokens.json +3 -0
  2. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/config.json +3 -0
  3. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/generation_config.json +3 -0
  4. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/merges.txt +0 -0
  5. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors +3 -0
  6. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00002-of-00006.safetensors +3 -0
  7. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00003-of-00006.safetensors +3 -0
  8. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00004-of-00006.safetensors +3 -0
  9. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00005-of-00006.safetensors +3 -0
  10. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00006-of-00006.safetensors +3 -0
  11. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model.safetensors.index.json +3 -0
  12. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/special_tokens_map.json +3 -0
  13. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/tokenizer.json +3 -0
  14. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/tokenizer_config.json +3 -0
  15. qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/vocab.json +3 -0
  16. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/added_tokens.json +3 -0
  17. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/config.json +3 -0
  18. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/generation_config.json +3 -0
  19. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/merges.txt +0 -0
  20. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00001-of-00006.safetensors +3 -0
  21. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00002-of-00006.safetensors +3 -0
  22. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00003-of-00006.safetensors +3 -0
  23. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00004-of-00006.safetensors +3 -0
  24. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00005-of-00006.safetensors +3 -0
  25. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00006-of-00006.safetensors +3 -0
  26. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model.safetensors.index.json +3 -0
  27. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/special_tokens_map.json +3 -0
  28. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/tokenizer.json +3 -0
  29. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/tokenizer_config.json +3 -0
  30. qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/vocab.json +3 -0
  31. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/added_tokens.json +3 -0
  32. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/config.json +3 -0
  33. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/generation_config.json +3 -0
  34. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/merges.txt +0 -0
  35. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors +3 -0
  36. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00002-of-00006.safetensors +3 -0
  37. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00003-of-00006.safetensors +3 -0
  38. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00004-of-00006.safetensors +3 -0
  39. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00005-of-00006.safetensors +3 -0
  40. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00006-of-00006.safetensors +3 -0
  41. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model.safetensors.index.json +3 -0
  42. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/special_tokens_map.json +3 -0
  43. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/tokenizer.json +3 -0
  44. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/tokenizer_config.json +3 -0
  45. qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/vocab.json +3 -0
  46. qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/added_tokens.json +3 -0
  47. qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/config.json +3 -0
  48. qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/generation_config.json +3 -0
  49. qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/merges.txt +0 -0
  50. qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors +3 -0
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
3
+ size 80
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
3
+ size 1008
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
3
+ size 139
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6cf00fe40c951c70dbba96e30b603231aa4b860315a4fe7cb522c08962ce90e
3
+ size 4996577736
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:621369869da69cdef92c80c8a92728aa910648375af6fa746ba45654872a72d5
3
+ size 4996347752
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a45e33783fa9d7dcda77d9f235cbc59bc6c09a73dba9af8407dd0426769ed0c
3
+ size 4997127120
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f5b3f761a8bd21ce3e8394aebcd4f26b5e6649f780d28abe0d3c7cb276e00c
3
+ size 4985592520
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:640e7208b56c408c2e4d2a4c714b2a8bb557bfbca994d3371801feaff323c08f
3
+ size 4996348976
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e089c7ff3af95e4d70282b220159cdd50c92d0647f30482a740fe24784eeb0fe
3
+ size 3660151400
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
3
+ size 416452
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
3
+ size 370
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
3
+ size 11418365
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
3
+ size 1331
qwen1.5_base_rule_base_arc_heavy_drgrpo_reward_func/vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
3
+ size 2776833
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
3
+ size 80
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
3
+ size 1008
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
3
+ size 139
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4785618a3e3c0a5116add20387b923616be63530b212e1458af5a662ac2a8d5
3
+ size 4996577736
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae9a49d94b34c7a37e65a293054fbff92bd806b0a6def207475c6905f03e2eb
3
+ size 4996347752
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa19a71355afadf83082058520cf2abda37f1d0396cca0c817fcb0dc40523641
3
+ size 4997127120
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441aac4666355f28be24279646b2bf827b8195aac008d1351720e2f7f2fc3d95
3
+ size 4985592520
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:974946ca8de0ffd6e4e16a06fa331c0ad2b160f70492fc19bf28166644cac413
3
+ size 4996348976
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd2773e22bfd441db8f80f00cd1f3ea2c9865d30425a023ef245b3dd339927d2
3
+ size 3660151400
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
3
+ size 416452
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
3
+ size 370
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
3
+ size 11418365
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
3
+ size 1331
qwen1.5_base_rule_base_equal_dist_drgrpo_reward_func/vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
3
+ size 2776833
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
3
+ size 80
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
3
+ size 1008
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
3
+ size 139
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29fb7a17f2e3f8a1d312a27896dc220da13d844f1ef271da14f03017608e6ba
3
+ size 4996577736
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3fc32704ab3b4125f8319c7928a253efb8cfcbd50419881009b1322e112c669
3
+ size 4996347752
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87af473fc7b12f0b9cc65c1ade1b3f58aee3b0eabcc1f1b8a65e302daca053d1
3
+ size 4997127120
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3444360f8315ba1f1883d7b331093301efb9ec9182c1ac06ea741434aae472c
3
+ size 4985592520
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ee65329334e31f74b559fbe8942065f4be3c4650bdeca097c11c6c0ca5e43b4
3
+ size 4996348976
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db0dd3986f8bebda6cc42668bb174139c06c0ea20192f7564a6a4bcf9ece3f16
3
+ size 3660151400
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
3
+ size 416452
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
3
+ size 370
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
3
+ size 11418365
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
3
+ size 1331
qwen1.5_base_rule_base_imdb_heavy_drgrpo_reward_func/vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
3
+ size 2776833
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
3
+ size 80
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
3
+ size 1008
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
3
+ size 139
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen1.5_base_rule_base_math_heavy_drgrpo_reward_func/model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:582f8f4546a336d66d626898ea06eff4d217e4e4e171c46ced44167972ae88dc
3
+ size 4996577736