danielhanchen commited on
Commit
95d26c7
·
verified ·
1 Parent(s): 6645256

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Qwen3-16B-A3B-UD-IQ1_S.gguf filter=lfs diff=lfs merge=lfs -text
37
+ Qwen3-16B-A3B-UD-IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text
38
+ Qwen3-16B-A3B-UD-IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text
39
+ Qwen3-16B-A3B-UD-Q2_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
40
+ Qwen3-16B-A3B-UD-Q4_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-16B-A3B-UD-IQ1_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c25a77953a78ae48c82065abe87bd6a08d7791901cc969d3b9235e182f8d14
3
+ size 5251044032
Qwen3-16B-A3B-UD-IQ1_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca56d774c49c9a6ad1861ffbd414858646590bcfa08dcd2fd8ec913f7db4c24
3
+ size 4873638592
Qwen3-16B-A3B-UD-IQ2_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46744b84fc0498a1354827d848297f08e7ef2908b7e239a4b0adbea01fc1ae59
3
+ size 5854958272
Qwen3-16B-A3B-UD-Q2_K_XL.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f7a7db4bc621f5037ff75880c8cec2b96b202d403dc34ebfaa535b0bb045cd
3
+ size 6342546112
Qwen3-16B-A3B-UD-Q4_K_XL.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6902a57785e5594a6b97162c6642d3d201743fb97d834423f4aae36b29e4891
3
+ size 9310278336
README.md CHANGED
@@ -1,16 +1,12 @@
1
  ---
2
  tags:
3
  - unsloth
4
- - qwen3
5
- - qwen
6
  license: apache-2.0
7
  base_model:
8
  - kalomaze/Qwen3-16B-A3B
9
  ---
10
  # Qwen3-16B-A3B
11
 
12
- Qwen3-16B-A3B is a rendition of Qwen3-30B-A3B by [kalomaze](https://huggingface.co/kalomaze/Qwen3-16B-A3B).
13
-
14
  A man-made horror beyond your comprehension.
15
 
16
  But no, seriously, this is my experiment to:
@@ -20,4 +16,4 @@ But no, seriously, this is my experiment to:
20
  It can still write semi-coherently without any additional training or distillation done on top of it from the original 30b MoE.
21
  The .txt files with the original measurements are provided in the repo along with the exported weights.
22
 
23
- Custom testing to measure the experts was done on a hacked version of vllm, and then I made a bespoke script to selectively export the weights according to the measurements.
 
1
  ---
2
  tags:
3
  - unsloth
 
 
4
  license: apache-2.0
5
  base_model:
6
  - kalomaze/Qwen3-16B-A3B
7
  ---
8
  # Qwen3-16B-A3B
9
 
 
 
10
  A man-made horror beyond your comprehension.
11
 
12
  But no, seriously, this is my experiment to:
 
16
  It can still write semi-coherently without any additional training or distillation done on top of it from the original 30b MoE.
17
  The .txt files with the original measurements are provided in the repo along with the exported weights.
18
 
19
+ Custom testing to measure the experts was done on a hacked version of vllm, and then I made a bespoke script to selectively export the weights according to the measurements.
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3MoeForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "decoder_sparse_step": 1,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 6144,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 48,
16
+ "mlp_only_layers": [],
17
+ "model_type": "qwen3_moe",
18
+ "moe_intermediate_size": 768,
19
+ "norm_topk_prob": true,
20
+ "num_attention_heads": 32,
21
+ "num_experts": 64,
22
+ "num_experts_per_tok": 8,
23
+ "num_hidden_layers": 48,
24
+ "num_key_value_heads": 4,
25
+ "output_router_logits": false,
26
+ "pad_token_id": 151654,
27
+ "rms_norm_eps": 1e-06,
28
+ "rope_scaling": null,
29
+ "rope_theta": 1000000.0,
30
+ "router_aux_loss_coef": 0.001,
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.52.0.dev0",
35
+ "unsloth_fixed": true,
36
+ "use_cache": true,
37
+ "use_sliding_window": false,
38
+ "vocab_size": 151936
39
+ }