danielhanchen commited on
Commit
719e9af
·
verified ·
1 Parent(s): 9784122

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -40,3 +40,6 @@ DeepSeek-R1-Distill-Qwen-1.5B-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
40
  DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
41
  DeepSeek-R1-Distill-Qwen-1.5B-Q2_K_L.gguf filter=lfs diff=lfs merge=lfs -text
42
  DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
40
  DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
41
  DeepSeek-R1-Distill-Qwen-1.5B-Q2_K_L.gguf filter=lfs diff=lfs merge=lfs -text
42
  DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
43
+ DeepSeek-R1-Distill-Qwen-1.5B-UD-IQ1_S.gguf filter=lfs diff=lfs merge=lfs -text
44
+ DeepSeek-R1-Distill-Qwen-1.5B-UD-IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text
45
+ DeepSeek-R1-Distill-Qwen-1.5B-UD-Q4_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
DeepSeek-R1-Distill-Qwen-1.5B-UD-IQ1_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdc7eceff5045eb856f98bcc826233a00f27388e826a7a706e2eac606f1e72c2
3
+ size 700140640
DeepSeek-R1-Distill-Qwen-1.5B-UD-IQ2_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0494af1dc5910fdda79884e97e7c3b0d6f3ff7b02109455b0c2d4197aec8290
3
+ size 792150112
DeepSeek-R1-Distill-Qwen-1.5B-UD-Q4_K_XL.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88e62a805cf0e89cddd2c9fe1aa93a4cef69cc4a3b2ff1bb44dc3c17240f5d57
3
+ size 1189281376
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151643,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1536,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 8960,
12
+ "max_position_embeddings": 131072,
13
+ "max_window_layers": 21,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 2,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_theta": 10000,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.44.0",
24
+ "use_cache": true,
25
+ "use_mrope": false,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 151936
28
+ }