danielhanchen commited on
Commit
8116c8c
·
verified ·
1 Parent(s): 9e9c90d

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,18 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00009-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
37
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00001-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
38
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00015-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
39
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00008-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
40
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00011-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
41
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00006-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
42
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00014-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
43
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00007-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
44
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00010-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
45
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00013-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
46
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00012-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
47
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00005-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
48
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00004-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
49
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00003-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
50
+ Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00002-of-00015.gguf filter=lfs diff=lfs merge=lfs -text
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00001-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8c14ba4a003755a444e63dac35f54448625fd6146b73e9536b2916d262bb86
3
+ size 47784556352
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00002-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb00ba695025160463c94f4617239c6a67705e90489e61651428525b9a8c85a
3
+ size 48927728256
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00003-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30058f639815c3b19887ee8610755a4394109582a1a296ff81670a6e3cb1933c
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00004-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce96d94f1dd1333b5cd04f3d38f3b4dbe75e6136878d50899a0882d71c8521c9
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00005-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db015244bde73412e3ad6572900d57b0db0f31b36fe67c994f813d286505bce6
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00006-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b72b38f5d330dec5a22097cd5f6b75ad6e54a35a96795f5a9e767cb2efe4a4d
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00007-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00275dcd796b8e3d6109fbd38e85577e6ec99f24af5a149400f5ae16239a669
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00008-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:355e3c77a1555698c02839d76c5747bba5d764ba269d4bf190631767a356fa95
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00009-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d832e3fb7a23d8a1a1209006a692c2e8cf05831dc3d56efef9ecf299ca8b1cef
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00010-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3befa8157e8cf39940a31628535b81f8552fb9ae979dd4fd58247c387c9641f2
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00011-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f0b6165ce0a70bb4f80a42ef5c42b5ae3607be2d54404a1a3bd33126e525e35
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00012-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:680de0356e4404c4b2a272cc5f73a40edbfe7c706ef27d1b05c3c231a1908b9f
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00013-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb101d44c5b407328800af1fa0f2b09a9d3e38aa921999100101d9c531c1a70
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00014-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be7acaab4dd4ed044bb36d8743029af2e35d7bccb73d6b2f7589d892ca85decd
3
+ size 48927728320
Q8_0/DeepSeek-Prover-V2-671B-Q8_0-00015-of-00015.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcce5a4376eb65c6111b120fa14031a72aea45a30f46dd850c643e7a44dbecbc
3
+ size 29441495680
config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "bos_token_id": 0,
13
+ "eos_token_id": 1,
14
+ "ep_size": 1,
15
+ "first_k_dense_replace": 3,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 7168,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 18432,
20
+ "kv_lora_rank": 512,
21
+ "max_position_embeddings": 163840,
22
+ "model_type": "deepseek_v3",
23
+ "moe_intermediate_size": 2048,
24
+ "moe_layer_freq": 1,
25
+ "n_group": 8,
26
+ "n_routed_experts": 256,
27
+ "n_shared_experts": 1,
28
+ "norm_topk_prob": true,
29
+ "num_attention_heads": 128,
30
+ "num_experts_per_tok": 8,
31
+ "num_hidden_layers": 61,
32
+ "num_key_value_heads": 128,
33
+ "num_nextn_predict_layers": 1,
34
+ "q_lora_rank": 1536,
35
+ "qk_nope_head_dim": 128,
36
+ "qk_rope_head_dim": 64,
37
+ "quantization_config": {
38
+ "activation_scheme": "dynamic",
39
+ "fmt": "e4m3",
40
+ "quant_method": "fp8",
41
+ "weight_block_size": [
42
+ 128,
43
+ 128
44
+ ]
45
+ },
46
+ "rms_norm_eps": 1e-06,
47
+ "rope_scaling": {
48
+ "beta_fast": 32,
49
+ "beta_slow": 1,
50
+ "factor": 40,
51
+ "mscale": 1.0,
52
+ "mscale_all_dim": 1.0,
53
+ "original_max_position_embeddings": 4096,
54
+ "type": "yarn"
55
+ },
56
+ "rope_theta": 10000,
57
+ "routed_scaling_factor": 2.5,
58
+ "scoring_func": "sigmoid",
59
+ "tie_word_embeddings": false,
60
+ "topk_group": 4,
61
+ "topk_method": "noaux_tc",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.46.3",
64
+ "use_cache": true,
65
+ "v_head_dim": 128,
66
+ "vocab_size": 129280
67
+ }