{ "quant_method": "exl3", "version": "0.0.1", "bits": 3.25, "calibration": { "rows": 100, "cols": 2048 }, "tensor_storage": { "model.embed_tokens": { "stored_tensors": { "model.embed_tokens.weight": { "shape": [ 128256, 16384 ], "n_bytes": 4202692608, "dtype": "torch.float16" } } }, "model.layers.0.input_layernorm": { "stored_tensors": { "model.layers.0.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.0.self_attn.q_proj": { "stored_tensors": { "model.layers.0.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.0.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.0.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.0.self_attn.k_proj": { "stored_tensors": { "model.layers.0.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.0.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.0.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.0.self_attn.v_proj": { "stored_tensors": { "model.layers.0.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.0.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.0.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.0.self_attn.o_proj": { "stored_tensors": { "model.layers.0.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.0.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.0.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.0.post_attention_layernorm": { "stored_tensors": { "model.layers.0.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.0.mlp.up_proj": { "stored_tensors": { "model.layers.0.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.0.mlp.up_proj.svh": { "shape": [ 5376 ], "n_bytes": 10752, "dtype": "torch.float16" }, "model.layers.0.mlp.up_proj.trellis": { "shape": [ 1024, 336, 64 ], "n_bytes": 44040192, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.0.mlp.gate_proj": { "stored_tensors": { "model.layers.0.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.0.mlp.gate_proj.svh": { "shape": [ 5376 ], "n_bytes": 10752, "dtype": "torch.float16" }, "model.layers.0.mlp.gate_proj.trellis": { "shape": [ 1024, 336, 48 ], "n_bytes": 33030144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.0.mlp.down_proj": { "stored_tensors": { "model.layers.0.mlp.down_proj.suh": { "shape": [ 5376 ], "n_bytes": 10752, "dtype": "torch.float16" }, "model.layers.0.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.0.mlp.down_proj.trellis": { "shape": [ 336, 1024, 64 ], "n_bytes": 44040192, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.1.input_layernorm": { "stored_tensors": { "model.layers.1.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.1.self_attn.q_proj": { "stored_tensors": { "model.layers.1.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.1.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.1.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.1.self_attn.k_proj": { "stored_tensors": { "model.layers.1.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.1.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.1.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.1.self_attn.v_proj": { "stored_tensors": { "model.layers.1.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.1.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.1.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.1.self_attn.o_proj": { "stored_tensors": { "model.layers.1.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.1.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.1.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.1.post_attention_layernorm": { "stored_tensors": { "model.layers.1.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.1.mlp.up_proj": { "stored_tensors": { "model.layers.1.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.1.mlp.up_proj.svh": { "shape": [ 10752 ], "n_bytes": 21504, "dtype": "torch.float16" }, "model.layers.1.mlp.up_proj.trellis": { "shape": [ 1024, 672, 48 ], "n_bytes": 66060288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.1.mlp.gate_proj": { "stored_tensors": { "model.layers.1.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.1.mlp.gate_proj.svh": { "shape": [ 10752 ], "n_bytes": 21504, "dtype": "torch.float16" }, "model.layers.1.mlp.gate_proj.trellis": { "shape": [ 1024, 672, 48 ], "n_bytes": 66060288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.1.mlp.down_proj": { "stored_tensors": { "model.layers.1.mlp.down_proj.suh": { "shape": [ 10752 ], "n_bytes": 21504, "dtype": "torch.float16" }, "model.layers.1.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.1.mlp.down_proj.trellis": { "shape": [ 672, 1024, 64 ], "n_bytes": 88080384, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.2.input_layernorm": { "stored_tensors": { "model.layers.2.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.2.self_attn.q_proj": { "stored_tensors": { "model.layers.2.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.2.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.2.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.2.self_attn.k_proj": { "stored_tensors": { "model.layers.2.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.2.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.2.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.2.self_attn.v_proj": { "stored_tensors": { "model.layers.2.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.2.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.2.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.2.self_attn.o_proj": { "stored_tensors": { "model.layers.2.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.2.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.2.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.2.post_attention_layernorm": { "stored_tensors": { "model.layers.2.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.2.mlp.up_proj": { "stored_tensors": { "model.layers.2.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.2.mlp.up_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.2.mlp.up_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.2.mlp.gate_proj": { "stored_tensors": { "model.layers.2.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.2.mlp.gate_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.2.mlp.gate_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.2.mlp.down_proj": { "stored_tensors": { "model.layers.2.mlp.down_proj.suh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.2.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.2.mlp.down_proj.trellis": { "shape": [ 1008, 1024, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.3.input_layernorm": { "stored_tensors": { "model.layers.3.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.3.self_attn.q_proj": { "stored_tensors": { "model.layers.3.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.3.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.3.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.3.self_attn.k_proj": { "stored_tensors": { "model.layers.3.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.3.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.3.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.3.self_attn.v_proj": { "stored_tensors": { "model.layers.3.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.3.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.3.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.3.self_attn.o_proj": { "stored_tensors": { "model.layers.3.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.3.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.3.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.3.post_attention_layernorm": { "stored_tensors": { "model.layers.3.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.3.mlp.up_proj": { "stored_tensors": { "model.layers.3.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.3.mlp.up_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.3.mlp.up_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.3.mlp.gate_proj": { "stored_tensors": { "model.layers.3.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.3.mlp.gate_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.3.mlp.gate_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.3.mlp.down_proj": { "stored_tensors": { "model.layers.3.mlp.down_proj.suh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.3.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.3.mlp.down_proj.trellis": { "shape": [ 1008, 1024, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.4.input_layernorm": { "stored_tensors": { "model.layers.4.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.4.self_attn.q_proj": { "stored_tensors": { "model.layers.4.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.4.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.4.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.4.self_attn.k_proj": { "stored_tensors": { "model.layers.4.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.4.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.4.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.4.self_attn.v_proj": { "stored_tensors": { "model.layers.4.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.4.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.4.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.4.self_attn.o_proj": { "stored_tensors": { "model.layers.4.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.4.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.4.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.4.post_attention_layernorm": { "stored_tensors": { "model.layers.4.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.4.mlp.up_proj": { "stored_tensors": { "model.layers.4.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.4.mlp.up_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.4.mlp.up_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.4.mlp.gate_proj": { "stored_tensors": { "model.layers.4.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.4.mlp.gate_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.4.mlp.gate_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.4.mlp.down_proj": { "stored_tensors": { "model.layers.4.mlp.down_proj.suh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.4.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.4.mlp.down_proj.trellis": { "shape": [ 1008, 1024, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.5.input_layernorm": { "stored_tensors": { "model.layers.5.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.5.self_attn.q_proj": { "stored_tensors": { "model.layers.5.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.5.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.5.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.5.self_attn.k_proj": { "stored_tensors": { "model.layers.5.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.5.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.5.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.5.self_attn.v_proj": { "stored_tensors": { "model.layers.5.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.5.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.5.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.5.self_attn.o_proj": { "stored_tensors": { "model.layers.5.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.5.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.5.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.5.post_attention_layernorm": { "stored_tensors": { "model.layers.5.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.5.mlp.up_proj": { "stored_tensors": { "model.layers.5.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.5.mlp.up_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.5.mlp.up_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.5.mlp.gate_proj": { "stored_tensors": { "model.layers.5.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.5.mlp.gate_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.5.mlp.gate_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.5.mlp.down_proj": { "stored_tensors": { "model.layers.5.mlp.down_proj.suh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.5.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.5.mlp.down_proj.trellis": { "shape": [ 1008, 1024, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.6.input_layernorm": { "stored_tensors": { "model.layers.6.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.6.self_attn.q_proj": { "stored_tensors": { "model.layers.6.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.6.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.6.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.6.self_attn.k_proj": { "stored_tensors": { "model.layers.6.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.6.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.6.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.6.self_attn.v_proj": { "stored_tensors": { "model.layers.6.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.6.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.6.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.6.self_attn.o_proj": { "stored_tensors": { "model.layers.6.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.6.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.6.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.6.post_attention_layernorm": { "stored_tensors": { "model.layers.6.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.6.mlp.up_proj": { "stored_tensors": { "model.layers.6.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.6.mlp.up_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.6.mlp.up_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.6.mlp.gate_proj": { "stored_tensors": { "model.layers.6.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.6.mlp.gate_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.6.mlp.gate_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.6.mlp.down_proj": { "stored_tensors": { "model.layers.6.mlp.down_proj.suh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.6.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.6.mlp.down_proj.trellis": { "shape": [ 1008, 1024, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.7.input_layernorm": { "stored_tensors": { "model.layers.7.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.7.self_attn.q_proj": { "stored_tensors": { "model.layers.7.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.7.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.7.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.7.self_attn.k_proj": { "stored_tensors": { "model.layers.7.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.7.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.7.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.7.self_attn.v_proj": { "stored_tensors": { "model.layers.7.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.7.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.7.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.7.self_attn.o_proj": { "stored_tensors": { "model.layers.7.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.7.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.7.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.7.post_attention_layernorm": { "stored_tensors": { "model.layers.7.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.7.mlp.up_proj": { "stored_tensors": { "model.layers.7.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.7.mlp.up_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.7.mlp.up_proj.trellis": { "shape": [ 1024, 1008, 64 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.7.mlp.gate_proj": { "stored_tensors": { "model.layers.7.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.7.mlp.gate_proj.svh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.7.mlp.gate_proj.trellis": { "shape": [ 1024, 1008, 48 ], "n_bytes": 99090432, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.7.mlp.down_proj": { "stored_tensors": { "model.layers.7.mlp.down_proj.suh": { "shape": [ 16128 ], "n_bytes": 32256, "dtype": "torch.float16" }, "model.layers.7.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.7.mlp.down_proj.trellis": { "shape": [ 1008, 1024, 64 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.8.input_layernorm": { "stored_tensors": { "model.layers.8.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.8.self_attn.q_proj": { "stored_tensors": { "model.layers.8.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.8.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.8.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.8.self_attn.k_proj": { "stored_tensors": { "model.layers.8.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.8.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.8.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.8.self_attn.v_proj": { "stored_tensors": { "model.layers.8.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.8.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.8.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.8.self_attn.o_proj": { "stored_tensors": { "model.layers.8.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.8.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.8.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.8.post_attention_layernorm": { "stored_tensors": { "model.layers.8.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.8.mlp.up_proj": { "stored_tensors": { "model.layers.8.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.8.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.8.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.8.mlp.gate_proj": { "stored_tensors": { "model.layers.8.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.8.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.8.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.8.mlp.down_proj": { "stored_tensors": { "model.layers.8.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.8.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.8.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.9": { "stored_tensors": {} }, "model.layers.10": { "stored_tensors": {} }, "model.layers.11": { "stored_tensors": {} }, "model.layers.12": { "stored_tensors": {} }, "model.layers.13.input_layernorm": { "stored_tensors": { "model.layers.13.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.13.self_attn.q_proj": { "stored_tensors": { "model.layers.13.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.13.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.13.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.13.self_attn.k_proj": { "stored_tensors": { "model.layers.13.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.13.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.13.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.13.self_attn.v_proj": { "stored_tensors": { "model.layers.13.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.13.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.13.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.13.self_attn.o_proj": { "stored_tensors": { "model.layers.13.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.13.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.13.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.13.post_attention_layernorm": { "stored_tensors": { "model.layers.13.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.13.mlp.up_proj": { "stored_tensors": { "model.layers.13.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.13.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.13.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.13.mlp.gate_proj": { "stored_tensors": { "model.layers.13.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.13.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.13.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.13.mlp.down_proj": { "stored_tensors": { "model.layers.13.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.13.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.13.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.14.input_layernorm": { "stored_tensors": { "model.layers.14.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.14.self_attn.q_proj": { "stored_tensors": { "model.layers.14.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.14.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.14.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.14.self_attn.k_proj": { "stored_tensors": { "model.layers.14.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.14.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.14.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.14.self_attn.v_proj": { "stored_tensors": { "model.layers.14.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.14.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.14.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.14.self_attn.o_proj": { "stored_tensors": { "model.layers.14.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.14.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.14.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.14.post_attention_layernorm": { "stored_tensors": { "model.layers.14.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.14.mlp.up_proj": { "stored_tensors": { "model.layers.14.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.14.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.14.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.14.mlp.gate_proj": { "stored_tensors": { "model.layers.14.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.14.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.14.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.14.mlp.down_proj": { "stored_tensors": { "model.layers.14.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.14.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.14.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.15.input_layernorm": { "stored_tensors": { "model.layers.15.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.15.self_attn.q_proj": { "stored_tensors": { "model.layers.15.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.15.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.15.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.15.self_attn.k_proj": { "stored_tensors": { "model.layers.15.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.15.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.15.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.15.self_attn.v_proj": { "stored_tensors": { "model.layers.15.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.15.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.15.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.15.self_attn.o_proj": { "stored_tensors": { "model.layers.15.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.15.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.15.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.15.post_attention_layernorm": { "stored_tensors": { "model.layers.15.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.15.mlp.up_proj": { "stored_tensors": { "model.layers.15.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.15.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.15.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.15.mlp.gate_proj": { "stored_tensors": { "model.layers.15.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.15.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.15.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.15.mlp.down_proj": { "stored_tensors": { "model.layers.15.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.15.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.15.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.16.input_layernorm": { "stored_tensors": { "model.layers.16.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.16.self_attn.q_proj": { "stored_tensors": { "model.layers.16.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.16.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.16.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.16.self_attn.k_proj": { "stored_tensors": { "model.layers.16.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.16.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.16.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.16.self_attn.v_proj": { "stored_tensors": { "model.layers.16.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.16.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.16.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.16.self_attn.o_proj": { "stored_tensors": { "model.layers.16.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.16.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.16.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.16.post_attention_layernorm": { "stored_tensors": { "model.layers.16.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.16.mlp.up_proj": { "stored_tensors": { "model.layers.16.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.16.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.16.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.16.mlp.gate_proj": { "stored_tensors": { "model.layers.16.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.16.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.16.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.16.mlp.down_proj": { "stored_tensors": { "model.layers.16.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.16.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.16.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.17.input_layernorm": { "stored_tensors": { "model.layers.17.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.17.self_attn.q_proj": { "stored_tensors": { "model.layers.17.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.17.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.17.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.17.self_attn.k_proj": { "stored_tensors": { "model.layers.17.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.17.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.17.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.17.self_attn.v_proj": { "stored_tensors": { "model.layers.17.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.17.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.17.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.17.self_attn.o_proj": { "stored_tensors": { "model.layers.17.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.17.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.17.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.17.post_attention_layernorm": { "stored_tensors": { "model.layers.17.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.17.mlp.up_proj": { "stored_tensors": { "model.layers.17.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.17.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.17.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.17.mlp.gate_proj": { "stored_tensors": { "model.layers.17.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.17.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.17.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.17.mlp.down_proj": { "stored_tensors": { "model.layers.17.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.17.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.17.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.18": { "stored_tensors": {} }, "model.layers.19": { "stored_tensors": {} }, "model.layers.20": { "stored_tensors": {} }, "model.layers.21": { "stored_tensors": {} }, "model.layers.22": { "stored_tensors": {} }, "model.layers.23": { "stored_tensors": {} }, "model.layers.24.input_layernorm": { "stored_tensors": { "model.layers.24.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.24.self_attn.q_proj": { "stored_tensors": { "model.layers.24.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.24.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.24.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.24.self_attn.k_proj": { "stored_tensors": { "model.layers.24.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.24.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.24.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.24.self_attn.v_proj": { "stored_tensors": { "model.layers.24.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.24.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.24.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.24.self_attn.o_proj": { "stored_tensors": { "model.layers.24.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.24.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.24.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.24.post_attention_layernorm": { "stored_tensors": { "model.layers.24.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.24.mlp.up_proj": { "stored_tensors": { "model.layers.24.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.24.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.24.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.24.mlp.gate_proj": { "stored_tensors": { "model.layers.24.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.24.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.24.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.24.mlp.down_proj": { "stored_tensors": { "model.layers.24.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.24.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.24.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.25.input_layernorm": { "stored_tensors": { "model.layers.25.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.25.self_attn.q_proj": { "stored_tensors": { "model.layers.25.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.25.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.25.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.25.self_attn.k_proj": { "stored_tensors": { "model.layers.25.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.25.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.25.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.25.self_attn.v_proj": { "stored_tensors": { "model.layers.25.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.25.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.25.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.25.self_attn.o_proj": { "stored_tensors": { "model.layers.25.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.25.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.25.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.25.post_attention_layernorm": { "stored_tensors": { "model.layers.25.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.25.mlp.up_proj": { "stored_tensors": { "model.layers.25.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.25.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.25.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.25.mlp.gate_proj": { "stored_tensors": { "model.layers.25.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.25.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.25.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.25.mlp.down_proj": { "stored_tensors": { "model.layers.25.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.25.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.25.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.26.input_layernorm": { "stored_tensors": { "model.layers.26.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.26.self_attn.q_proj": { "stored_tensors": { "model.layers.26.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.26.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.26.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.26.self_attn.k_proj": { "stored_tensors": { "model.layers.26.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.26.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.26.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.26.self_attn.v_proj": { "stored_tensors": { "model.layers.26.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.26.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.26.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.26.self_attn.o_proj": { "stored_tensors": { "model.layers.26.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.26.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.26.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.26.post_attention_layernorm": { "stored_tensors": { "model.layers.26.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.26.mlp.up_proj": { "stored_tensors": { "model.layers.26.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.26.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.26.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.26.mlp.gate_proj": { "stored_tensors": { "model.layers.26.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.26.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.26.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.26.mlp.down_proj": { "stored_tensors": { "model.layers.26.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.26.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.26.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.27": { "stored_tensors": {} }, "model.layers.28": { "stored_tensors": {} }, "model.layers.29": { "stored_tensors": {} }, "model.layers.30": { "stored_tensors": {} }, "model.layers.31": { "stored_tensors": {} }, "model.layers.32.input_layernorm": { "stored_tensors": { "model.layers.32.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.32.self_attn.q_proj": { "stored_tensors": { "model.layers.32.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.32.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.32.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.32.self_attn.k_proj": { "stored_tensors": { "model.layers.32.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.32.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.32.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.32.self_attn.v_proj": { "stored_tensors": { "model.layers.32.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.32.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.32.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.32.self_attn.o_proj": { "stored_tensors": { "model.layers.32.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.32.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.32.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.32.post_attention_layernorm": { "stored_tensors": { "model.layers.32.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.32.mlp.up_proj": { "stored_tensors": { "model.layers.32.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.32.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.32.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.32.mlp.gate_proj": { "stored_tensors": { "model.layers.32.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.32.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.32.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.32.mlp.down_proj": { "stored_tensors": { "model.layers.32.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.32.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.32.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.33.input_layernorm": { "stored_tensors": { "model.layers.33.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.33.self_attn.q_proj": { "stored_tensors": { "model.layers.33.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.33.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.33.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.33.self_attn.k_proj": { "stored_tensors": { "model.layers.33.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.33.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.33.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.33.self_attn.v_proj": { "stored_tensors": { "model.layers.33.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.33.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.33.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.33.self_attn.o_proj": { "stored_tensors": { "model.layers.33.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.33.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.33.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.33.post_attention_layernorm": { "stored_tensors": { "model.layers.33.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.33.mlp.up_proj": { "stored_tensors": { "model.layers.33.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.33.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.33.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.33.mlp.gate_proj": { "stored_tensors": { "model.layers.33.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.33.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.33.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.33.mlp.down_proj": { "stored_tensors": { "model.layers.33.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.33.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.33.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.34.input_layernorm": { "stored_tensors": { "model.layers.34.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.34.self_attn.q_proj": { "stored_tensors": { "model.layers.34.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.34.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.34.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.34.self_attn.k_proj": { "stored_tensors": { "model.layers.34.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.34.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.34.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.34.self_attn.v_proj": { "stored_tensors": { "model.layers.34.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.34.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.34.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.34.self_attn.o_proj": { "stored_tensors": { "model.layers.34.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.34.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.34.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.34.post_attention_layernorm": { "stored_tensors": { "model.layers.34.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.34.mlp.up_proj": { "stored_tensors": { "model.layers.34.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.34.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.34.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.34.mlp.gate_proj": { "stored_tensors": { "model.layers.34.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.34.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.34.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.34.mlp.down_proj": { "stored_tensors": { "model.layers.34.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.34.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.34.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.35.input_layernorm": { "stored_tensors": { "model.layers.35.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.35.self_attn.q_proj": { "stored_tensors": { "model.layers.35.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.35.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.35.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.35.self_attn.k_proj": { "stored_tensors": { "model.layers.35.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.35.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.35.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.35.self_attn.v_proj": { "stored_tensors": { "model.layers.35.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.35.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.35.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.35.self_attn.o_proj": { "stored_tensors": { "model.layers.35.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.35.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.35.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.35.post_attention_layernorm": { "stored_tensors": { "model.layers.35.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.35.mlp.up_proj": { "stored_tensors": { "model.layers.35.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.35.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.35.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.35.mlp.gate_proj": { "stored_tensors": { "model.layers.35.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.35.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.35.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.35.mlp.down_proj": { "stored_tensors": { "model.layers.35.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.35.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.35.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.36": { "stored_tensors": {} }, "model.layers.37": { "stored_tensors": {} }, "model.layers.38": { "stored_tensors": {} }, "model.layers.39.input_layernorm": { "stored_tensors": { "model.layers.39.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.39.self_attn.q_proj": { "stored_tensors": { "model.layers.39.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.39.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.39.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.39.self_attn.k_proj": { "stored_tensors": { "model.layers.39.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.39.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.39.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.39.self_attn.v_proj": { "stored_tensors": { "model.layers.39.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.39.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.39.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.39.self_attn.o_proj": { "stored_tensors": { "model.layers.39.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.39.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.39.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.39.post_attention_layernorm": { "stored_tensors": { "model.layers.39.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.39.mlp.up_proj": { "stored_tensors": { "model.layers.39.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.39.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.39.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.39.mlp.gate_proj": { "stored_tensors": { "model.layers.39.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.39.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.39.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.39.mlp.down_proj": { "stored_tensors": { "model.layers.39.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.39.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.39.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.40.input_layernorm": { "stored_tensors": { "model.layers.40.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.40.self_attn.q_proj": { "stored_tensors": { "model.layers.40.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.40.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.40.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.40.self_attn.k_proj": { "stored_tensors": { "model.layers.40.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.40.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.40.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.40.self_attn.v_proj": { "stored_tensors": { "model.layers.40.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.40.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.40.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.40.self_attn.o_proj": { "stored_tensors": { "model.layers.40.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.40.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.40.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.40.post_attention_layernorm": { "stored_tensors": { "model.layers.40.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.40.mlp.up_proj": { "stored_tensors": { "model.layers.40.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.40.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.40.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.40.mlp.gate_proj": { "stored_tensors": { "model.layers.40.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.40.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.40.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.40.mlp.down_proj": { "stored_tensors": { "model.layers.40.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.40.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.40.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.41.input_layernorm": { "stored_tensors": { "model.layers.41.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.41.self_attn.q_proj": { "stored_tensors": { "model.layers.41.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.41.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.41.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.41.self_attn.k_proj": { "stored_tensors": { "model.layers.41.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.41.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.41.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.41.self_attn.v_proj": { "stored_tensors": { "model.layers.41.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.41.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.41.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.41.self_attn.o_proj": { "stored_tensors": { "model.layers.41.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.41.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.41.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.41.post_attention_layernorm": { "stored_tensors": { "model.layers.41.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.41.mlp.up_proj": { "stored_tensors": { "model.layers.41.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.41.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.41.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.41.mlp.gate_proj": { "stored_tensors": { "model.layers.41.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.41.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.41.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.41.mlp.down_proj": { "stored_tensors": { "model.layers.41.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.41.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.41.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.42.post_attention_layernorm": { "stored_tensors": { "model.layers.42.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.42.mlp.up_proj": { "stored_tensors": { "model.layers.42.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.42.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.42.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.42.mlp.gate_proj": { "stored_tensors": { "model.layers.42.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.42.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.42.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.42.mlp.down_proj": { "stored_tensors": { "model.layers.42.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.42.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.42.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.43.input_layernorm": { "stored_tensors": { "model.layers.43.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.43.self_attn.q_proj": { "stored_tensors": { "model.layers.43.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.43.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.43.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.43.self_attn.k_proj": { "stored_tensors": { "model.layers.43.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.43.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.43.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.43.self_attn.v_proj": { "stored_tensors": { "model.layers.43.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.43.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.43.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.43.self_attn.o_proj": { "stored_tensors": { "model.layers.43.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.43.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.43.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.43.post_attention_layernorm": { "stored_tensors": { "model.layers.43.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.43.mlp.up_proj": { "stored_tensors": { "model.layers.43.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.43.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.43.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.43.mlp.gate_proj": { "stored_tensors": { "model.layers.43.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.43.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.43.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.43.mlp.down_proj": { "stored_tensors": { "model.layers.43.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.43.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.43.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.44.post_attention_layernorm": { "stored_tensors": { "model.layers.44.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.44.mlp.up_proj": { "stored_tensors": { "model.layers.44.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.44.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.44.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.44.mlp.gate_proj": { "stored_tensors": { "model.layers.44.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.44.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.44.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.44.mlp.down_proj": { "stored_tensors": { "model.layers.44.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.44.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.44.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.45": { "stored_tensors": {} }, "model.layers.46": { "stored_tensors": {} }, "model.layers.47": { "stored_tensors": {} }, "model.layers.48": { "stored_tensors": {} }, "model.layers.49": { "stored_tensors": {} }, "model.layers.50.input_layernorm": { "stored_tensors": { "model.layers.50.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.50.self_attn.q_proj": { "stored_tensors": { "model.layers.50.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.50.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.50.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.50.self_attn.k_proj": { "stored_tensors": { "model.layers.50.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.50.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.50.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.50.self_attn.v_proj": { "stored_tensors": { "model.layers.50.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.50.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.50.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.50.self_attn.o_proj": { "stored_tensors": { "model.layers.50.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.50.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.50.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.50.post_attention_layernorm": { "stored_tensors": { "model.layers.50.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.50.mlp.up_proj": { "stored_tensors": { "model.layers.50.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.50.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.50.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.50.mlp.gate_proj": { "stored_tensors": { "model.layers.50.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.50.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.50.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.50.mlp.down_proj": { "stored_tensors": { "model.layers.50.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.50.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.50.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.51.input_layernorm": { "stored_tensors": { "model.layers.51.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.51.self_attn.q_proj": { "stored_tensors": { "model.layers.51.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.51.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.51.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.51.self_attn.k_proj": { "stored_tensors": { "model.layers.51.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.51.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.51.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.51.self_attn.v_proj": { "stored_tensors": { "model.layers.51.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.51.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.51.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.51.self_attn.o_proj": { "stored_tensors": { "model.layers.51.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.51.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.51.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.51.post_attention_layernorm": { "stored_tensors": { "model.layers.51.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.51.mlp.up_proj": { "stored_tensors": { "model.layers.51.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.51.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.51.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.51.mlp.gate_proj": { "stored_tensors": { "model.layers.51.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.51.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.51.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.51.mlp.down_proj": { "stored_tensors": { "model.layers.51.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.51.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.51.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.52.input_layernorm": { "stored_tensors": { "model.layers.52.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.52.self_attn.q_proj": { "stored_tensors": { "model.layers.52.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.52.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.52.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.52.self_attn.k_proj": { "stored_tensors": { "model.layers.52.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.52.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.52.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.52.self_attn.v_proj": { "stored_tensors": { "model.layers.52.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.52.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.52.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.52.self_attn.o_proj": { "stored_tensors": { "model.layers.52.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.52.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.52.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.52.post_attention_layernorm": { "stored_tensors": { "model.layers.52.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.52.mlp.up_proj": { "stored_tensors": { "model.layers.52.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.52.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.52.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.52.mlp.gate_proj": { "stored_tensors": { "model.layers.52.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.52.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.52.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.52.mlp.down_proj": { "stored_tensors": { "model.layers.52.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.52.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.52.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.53.input_layernorm": { "stored_tensors": { "model.layers.53.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.53.self_attn.q_proj": { "stored_tensors": { "model.layers.53.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.53.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.53.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.53.self_attn.k_proj": { "stored_tensors": { "model.layers.53.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.53.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.53.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.53.self_attn.v_proj": { "stored_tensors": { "model.layers.53.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.53.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.53.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.53.self_attn.o_proj": { "stored_tensors": { "model.layers.53.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.53.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.53.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.53.post_attention_layernorm": { "stored_tensors": { "model.layers.53.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.53.mlp.up_proj": { "stored_tensors": { "model.layers.53.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.53.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.53.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.53.mlp.gate_proj": { "stored_tensors": { "model.layers.53.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.53.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.53.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.53.mlp.down_proj": { "stored_tensors": { "model.layers.53.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.53.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.53.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.54": { "stored_tensors": {} }, "model.layers.55": { "stored_tensors": {} }, "model.layers.56": { "stored_tensors": {} }, "model.layers.57": { "stored_tensors": {} }, "model.layers.58": { "stored_tensors": {} }, "model.layers.59.input_layernorm": { "stored_tensors": { "model.layers.59.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.59.self_attn.q_proj": { "stored_tensors": { "model.layers.59.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.59.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.59.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.59.self_attn.k_proj": { "stored_tensors": { "model.layers.59.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.59.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.59.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.59.self_attn.v_proj": { "stored_tensors": { "model.layers.59.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.59.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.59.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.59.self_attn.o_proj": { "stored_tensors": { "model.layers.59.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.59.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.59.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.59.post_attention_layernorm": { "stored_tensors": { "model.layers.59.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.59.mlp.up_proj": { "stored_tensors": { "model.layers.59.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.59.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.59.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.59.mlp.gate_proj": { "stored_tensors": { "model.layers.59.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.59.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.59.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.59.mlp.down_proj": { "stored_tensors": { "model.layers.59.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.59.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.59.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.60.input_layernorm": { "stored_tensors": { "model.layers.60.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.60.self_attn.q_proj": { "stored_tensors": { "model.layers.60.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.60.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.60.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.60.self_attn.k_proj": { "stored_tensors": { "model.layers.60.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.60.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.60.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.60.self_attn.v_proj": { "stored_tensors": { "model.layers.60.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.60.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.60.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.60.self_attn.o_proj": { "stored_tensors": { "model.layers.60.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.60.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.60.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.60.post_attention_layernorm": { "stored_tensors": { "model.layers.60.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.60.mlp.up_proj": { "stored_tensors": { "model.layers.60.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.60.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.60.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.60.mlp.gate_proj": { "stored_tensors": { "model.layers.60.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.60.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.60.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.60.mlp.down_proj": { "stored_tensors": { "model.layers.60.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.60.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.60.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.61.input_layernorm": { "stored_tensors": { "model.layers.61.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.61.self_attn.q_proj": { "stored_tensors": { "model.layers.61.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.61.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.61.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.61.self_attn.k_proj": { "stored_tensors": { "model.layers.61.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.61.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.61.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.61.self_attn.v_proj": { "stored_tensors": { "model.layers.61.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.61.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.61.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.61.self_attn.o_proj": { "stored_tensors": { "model.layers.61.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.61.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.61.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.61.post_attention_layernorm": { "stored_tensors": { "model.layers.61.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.61.mlp.up_proj": { "stored_tensors": { "model.layers.61.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.61.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.61.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.61.mlp.gate_proj": { "stored_tensors": { "model.layers.61.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.61.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.61.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.61.mlp.down_proj": { "stored_tensors": { "model.layers.61.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.61.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.61.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.62.input_layernorm": { "stored_tensors": { "model.layers.62.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.62.self_attn.q_proj": { "stored_tensors": { "model.layers.62.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.62.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.62.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.62.self_attn.k_proj": { "stored_tensors": { "model.layers.62.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.62.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.62.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.62.self_attn.v_proj": { "stored_tensors": { "model.layers.62.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.62.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.62.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.62.self_attn.o_proj": { "stored_tensors": { "model.layers.62.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.62.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.62.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.62.post_attention_layernorm": { "stored_tensors": { "model.layers.62.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.62.mlp.up_proj": { "stored_tensors": { "model.layers.62.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.62.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.62.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.62.mlp.gate_proj": { "stored_tensors": { "model.layers.62.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.62.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.62.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.62.mlp.down_proj": { "stored_tensors": { "model.layers.62.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.62.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.62.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.63": { "stored_tensors": {} }, "model.layers.64": { "stored_tensors": {} }, "model.layers.65": { "stored_tensors": {} }, "model.layers.66": { "stored_tensors": {} }, "model.layers.67": { "stored_tensors": {} }, "model.layers.68.input_layernorm": { "stored_tensors": { "model.layers.68.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.68.self_attn.q_proj": { "stored_tensors": { "model.layers.68.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.68.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.68.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.68.self_attn.k_proj": { "stored_tensors": { "model.layers.68.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.68.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.68.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.68.self_attn.v_proj": { "stored_tensors": { "model.layers.68.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.68.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.68.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.68.self_attn.o_proj": { "stored_tensors": { "model.layers.68.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.68.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.68.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.68.post_attention_layernorm": { "stored_tensors": { "model.layers.68.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.68.mlp.up_proj": { "stored_tensors": { "model.layers.68.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.68.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.68.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.68.mlp.gate_proj": { "stored_tensors": { "model.layers.68.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.68.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.68.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.68.mlp.down_proj": { "stored_tensors": { "model.layers.68.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.68.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.68.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.69.input_layernorm": { "stored_tensors": { "model.layers.69.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.69.self_attn.q_proj": { "stored_tensors": { "model.layers.69.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.69.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.69.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.69.self_attn.k_proj": { "stored_tensors": { "model.layers.69.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.69.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.69.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.69.self_attn.v_proj": { "stored_tensors": { "model.layers.69.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.69.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.69.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.69.self_attn.o_proj": { "stored_tensors": { "model.layers.69.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.69.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.69.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.69.post_attention_layernorm": { "stored_tensors": { "model.layers.69.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.69.mlp.up_proj": { "stored_tensors": { "model.layers.69.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.69.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.69.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.69.mlp.gate_proj": { "stored_tensors": { "model.layers.69.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.69.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.69.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.69.mlp.down_proj": { "stored_tensors": { "model.layers.69.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.69.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.69.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.70.input_layernorm": { "stored_tensors": { "model.layers.70.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.70.self_attn.q_proj": { "stored_tensors": { "model.layers.70.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.70.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.70.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.70.self_attn.k_proj": { "stored_tensors": { "model.layers.70.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.70.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.70.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.70.self_attn.v_proj": { "stored_tensors": { "model.layers.70.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.70.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.70.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.70.self_attn.o_proj": { "stored_tensors": { "model.layers.70.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.70.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.70.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.70.post_attention_layernorm": { "stored_tensors": { "model.layers.70.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.70.mlp.up_proj": { "stored_tensors": { "model.layers.70.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.70.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.70.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.70.mlp.gate_proj": { "stored_tensors": { "model.layers.70.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.70.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.70.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.70.mlp.down_proj": { "stored_tensors": { "model.layers.70.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.70.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.70.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.71.input_layernorm": { "stored_tensors": { "model.layers.71.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.71.self_attn.q_proj": { "stored_tensors": { "model.layers.71.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.71.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.71.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.71.self_attn.k_proj": { "stored_tensors": { "model.layers.71.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.71.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.71.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.71.self_attn.v_proj": { "stored_tensors": { "model.layers.71.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.71.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.71.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.71.self_attn.o_proj": { "stored_tensors": { "model.layers.71.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.71.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.71.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.71.post_attention_layernorm": { "stored_tensors": { "model.layers.71.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.71.mlp.up_proj": { "stored_tensors": { "model.layers.71.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.71.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.71.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.71.mlp.gate_proj": { "stored_tensors": { "model.layers.71.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.71.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.71.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.71.mlp.down_proj": { "stored_tensors": { "model.layers.71.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.71.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.71.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.72": { "stored_tensors": {} }, "model.layers.73": { "stored_tensors": {} }, "model.layers.74": { "stored_tensors": {} }, "model.layers.75": { "stored_tensors": {} }, "model.layers.76": { "stored_tensors": {} }, "model.layers.77.input_layernorm": { "stored_tensors": { "model.layers.77.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.77.self_attn.q_proj": { "stored_tensors": { "model.layers.77.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.77.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.77.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.77.self_attn.k_proj": { "stored_tensors": { "model.layers.77.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.77.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.77.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.77.self_attn.v_proj": { "stored_tensors": { "model.layers.77.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.77.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.77.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.77.self_attn.o_proj": { "stored_tensors": { "model.layers.77.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.77.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.77.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.77.post_attention_layernorm": { "stored_tensors": { "model.layers.77.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.77.mlp.up_proj": { "stored_tensors": { "model.layers.77.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.77.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.77.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.77.mlp.gate_proj": { "stored_tensors": { "model.layers.77.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.77.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.77.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.77.mlp.down_proj": { "stored_tensors": { "model.layers.77.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.77.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.77.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.78.input_layernorm": { "stored_tensors": { "model.layers.78.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.78.self_attn.q_proj": { "stored_tensors": { "model.layers.78.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.78.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.78.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.78.self_attn.k_proj": { "stored_tensors": { "model.layers.78.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.78.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.78.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.78.self_attn.v_proj": { "stored_tensors": { "model.layers.78.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.78.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.78.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.78.self_attn.o_proj": { "stored_tensors": { "model.layers.78.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.78.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.78.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.78.post_attention_layernorm": { "stored_tensors": { "model.layers.78.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.78.mlp.up_proj": { "stored_tensors": { "model.layers.78.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.78.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.78.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.78.mlp.gate_proj": { "stored_tensors": { "model.layers.78.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.78.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.78.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.78.mlp.down_proj": { "stored_tensors": { "model.layers.78.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.78.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.78.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.79.input_layernorm": { "stored_tensors": { "model.layers.79.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.79.self_attn.q_proj": { "stored_tensors": { "model.layers.79.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.79.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.79.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.79.self_attn.k_proj": { "stored_tensors": { "model.layers.79.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.79.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.79.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.79.self_attn.v_proj": { "stored_tensors": { "model.layers.79.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.79.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.79.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.79.self_attn.o_proj": { "stored_tensors": { "model.layers.79.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.79.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.79.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.79.post_attention_layernorm": { "stored_tensors": { "model.layers.79.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.79.mlp.up_proj": { "stored_tensors": { "model.layers.79.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.79.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.79.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.79.mlp.gate_proj": { "stored_tensors": { "model.layers.79.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.79.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.79.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.79.mlp.down_proj": { "stored_tensors": { "model.layers.79.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.79.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.79.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.80.input_layernorm": { "stored_tensors": { "model.layers.80.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.80.self_attn.q_proj": { "stored_tensors": { "model.layers.80.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.80.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.80.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.80.self_attn.k_proj": { "stored_tensors": { "model.layers.80.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.80.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.80.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.80.self_attn.v_proj": { "stored_tensors": { "model.layers.80.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.80.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.80.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.80.self_attn.o_proj": { "stored_tensors": { "model.layers.80.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.80.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.80.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.80.post_attention_layernorm": { "stored_tensors": { "model.layers.80.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.80.mlp.up_proj": { "stored_tensors": { "model.layers.80.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.80.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.80.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.80.mlp.gate_proj": { "stored_tensors": { "model.layers.80.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.80.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.80.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.80.mlp.down_proj": { "stored_tensors": { "model.layers.80.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.80.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.80.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.81": { "stored_tensors": {} }, "model.layers.82": { "stored_tensors": {} }, "model.layers.83": { "stored_tensors": {} }, "model.layers.84": { "stored_tensors": {} }, "model.layers.85": { "stored_tensors": {} }, "model.layers.86.input_layernorm": { "stored_tensors": { "model.layers.86.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.86.self_attn.q_proj": { "stored_tensors": { "model.layers.86.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.86.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.86.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.86.self_attn.k_proj": { "stored_tensors": { "model.layers.86.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.86.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.86.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.86.self_attn.v_proj": { "stored_tensors": { "model.layers.86.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.86.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.86.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.86.self_attn.o_proj": { "stored_tensors": { "model.layers.86.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.86.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.86.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.86.post_attention_layernorm": { "stored_tensors": { "model.layers.86.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.86.mlp.up_proj": { "stored_tensors": { "model.layers.86.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.86.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.86.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.86.mlp.gate_proj": { "stored_tensors": { "model.layers.86.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.86.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.86.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.86.mlp.down_proj": { "stored_tensors": { "model.layers.86.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.86.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.86.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.87.input_layernorm": { "stored_tensors": { "model.layers.87.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.87.self_attn.q_proj": { "stored_tensors": { "model.layers.87.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.87.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.87.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.87.self_attn.k_proj": { "stored_tensors": { "model.layers.87.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.87.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.87.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.87.self_attn.v_proj": { "stored_tensors": { "model.layers.87.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.87.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.87.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.87.self_attn.o_proj": { "stored_tensors": { "model.layers.87.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.87.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.87.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.87.post_attention_layernorm": { "stored_tensors": { "model.layers.87.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.87.mlp.up_proj": { "stored_tensors": { "model.layers.87.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.87.mlp.up_proj.svh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.87.mlp.up_proj.trellis": { "shape": [ 1024, 2336, 48 ], "n_bytes": 229638144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.87.mlp.gate_proj": { "stored_tensors": { "model.layers.87.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.87.mlp.gate_proj.svh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.87.mlp.gate_proj.trellis": { "shape": [ 1024, 2336, 48 ], "n_bytes": 229638144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.87.mlp.down_proj": { "stored_tensors": { "model.layers.87.mlp.down_proj.suh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.87.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.87.mlp.down_proj.trellis": { "shape": [ 2336, 1024, 64 ], "n_bytes": 306184192, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.88.input_layernorm": { "stored_tensors": { "model.layers.88.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.88.self_attn.q_proj": { "stored_tensors": { "model.layers.88.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.88.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.88.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.88.self_attn.k_proj": { "stored_tensors": { "model.layers.88.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.88.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.88.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.88.self_attn.v_proj": { "stored_tensors": { "model.layers.88.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.88.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.88.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.88.self_attn.o_proj": { "stored_tensors": { "model.layers.88.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.88.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.88.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.88.post_attention_layernorm": { "stored_tensors": { "model.layers.88.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.88.mlp.up_proj": { "stored_tensors": { "model.layers.88.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.88.mlp.up_proj.svh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.88.mlp.up_proj.trellis": { "shape": [ 1024, 2336, 48 ], "n_bytes": 229638144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.88.mlp.gate_proj": { "stored_tensors": { "model.layers.88.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.88.mlp.gate_proj.svh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.88.mlp.gate_proj.trellis": { "shape": [ 1024, 2336, 48 ], "n_bytes": 229638144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.88.mlp.down_proj": { "stored_tensors": { "model.layers.88.mlp.down_proj.suh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.88.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.88.mlp.down_proj.trellis": { "shape": [ 2336, 1024, 64 ], "n_bytes": 306184192, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.89.input_layernorm": { "stored_tensors": { "model.layers.89.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.89.self_attn.q_proj": { "stored_tensors": { "model.layers.89.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.89.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.89.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.89.self_attn.k_proj": { "stored_tensors": { "model.layers.89.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.89.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.89.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.89.self_attn.v_proj": { "stored_tensors": { "model.layers.89.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.89.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.89.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.89.self_attn.o_proj": { "stored_tensors": { "model.layers.89.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.89.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.89.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.89.post_attention_layernorm": { "stored_tensors": { "model.layers.89.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.89.mlp.up_proj": { "stored_tensors": { "model.layers.89.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.89.mlp.up_proj.svh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.89.mlp.up_proj.trellis": { "shape": [ 1024, 2336, 48 ], "n_bytes": 229638144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.89.mlp.gate_proj": { "stored_tensors": { "model.layers.89.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.89.mlp.gate_proj.svh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.89.mlp.gate_proj.trellis": { "shape": [ 1024, 2336, 48 ], "n_bytes": 229638144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.89.mlp.down_proj": { "stored_tensors": { "model.layers.89.mlp.down_proj.suh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.89.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.89.mlp.down_proj.trellis": { "shape": [ 2336, 1024, 48 ], "n_bytes": 229638144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.90": { "stored_tensors": {} }, "model.layers.91": { "stored_tensors": {} }, "model.layers.92.input_layernorm": { "stored_tensors": { "model.layers.92.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.92.self_attn.q_proj": { "stored_tensors": { "model.layers.92.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.92.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.92.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.92.self_attn.k_proj": { "stored_tensors": { "model.layers.92.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.92.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.92.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.92.self_attn.v_proj": { "stored_tensors": { "model.layers.92.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.92.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.92.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.92.self_attn.o_proj": { "stored_tensors": { "model.layers.92.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.92.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.92.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.92.post_attention_layernorm": { "stored_tensors": { "model.layers.92.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.92.mlp.up_proj": { "stored_tensors": { "model.layers.92.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.92.mlp.up_proj.svh": { "shape": [ 32000 ], "n_bytes": 64000, "dtype": "torch.float16" }, "model.layers.92.mlp.up_proj.trellis": { "shape": [ 1024, 2000, 48 ], "n_bytes": 196608000, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.92.mlp.gate_proj": { "stored_tensors": { "model.layers.92.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.92.mlp.gate_proj.svh": { "shape": [ 32000 ], "n_bytes": 64000, "dtype": "torch.float16" }, "model.layers.92.mlp.gate_proj.trellis": { "shape": [ 1024, 2000, 48 ], "n_bytes": 196608000, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.92.mlp.down_proj": { "stored_tensors": { "model.layers.92.mlp.down_proj.suh": { "shape": [ 32000 ], "n_bytes": 64000, "dtype": "torch.float16" }, "model.layers.92.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.92.mlp.down_proj.trellis": { "shape": [ 2000, 1024, 64 ], "n_bytes": 262144000, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.93.input_layernorm": { "stored_tensors": { "model.layers.93.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.93.self_attn.q_proj": { "stored_tensors": { "model.layers.93.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.93.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.93.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.93.self_attn.k_proj": { "stored_tensors": { "model.layers.93.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.93.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.93.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.93.self_attn.v_proj": { "stored_tensors": { "model.layers.93.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.93.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.93.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.93.self_attn.o_proj": { "stored_tensors": { "model.layers.93.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.93.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.93.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.93.post_attention_layernorm": { "stored_tensors": { "model.layers.93.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.93.mlp.up_proj": { "stored_tensors": { "model.layers.93.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.93.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.93.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.93.mlp.gate_proj": { "stored_tensors": { "model.layers.93.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.93.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.93.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.93.mlp.down_proj": { "stored_tensors": { "model.layers.93.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.93.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.93.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.94.input_layernorm": { "stored_tensors": { "model.layers.94.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.94.self_attn.q_proj": { "stored_tensors": { "model.layers.94.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.94.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.94.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.94.self_attn.k_proj": { "stored_tensors": { "model.layers.94.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.94.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.94.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.94.self_attn.v_proj": { "stored_tensors": { "model.layers.94.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.94.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.94.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.94.self_attn.o_proj": { "stored_tensors": { "model.layers.94.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.94.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.94.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.94.post_attention_layernorm": { "stored_tensors": { "model.layers.94.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.94.mlp.up_proj": { "stored_tensors": { "model.layers.94.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.94.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.94.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.94.mlp.gate_proj": { "stored_tensors": { "model.layers.94.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.94.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.94.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.94.mlp.down_proj": { "stored_tensors": { "model.layers.94.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.94.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.94.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.95.input_layernorm": { "stored_tensors": { "model.layers.95.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.95.self_attn.q_proj": { "stored_tensors": { "model.layers.95.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.95.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.95.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.95.self_attn.k_proj": { "stored_tensors": { "model.layers.95.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.95.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.95.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.95.self_attn.v_proj": { "stored_tensors": { "model.layers.95.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.95.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.95.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.95.self_attn.o_proj": { "stored_tensors": { "model.layers.95.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.95.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.95.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.95.post_attention_layernorm": { "stored_tensors": { "model.layers.95.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.95.mlp.up_proj": { "stored_tensors": { "model.layers.95.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.95.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.95.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.95.mlp.gate_proj": { "stored_tensors": { "model.layers.95.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.95.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.95.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.95.mlp.down_proj": { "stored_tensors": { "model.layers.95.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.95.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.95.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.96.post_attention_layernorm": { "stored_tensors": { "model.layers.96.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.96.mlp.up_proj": { "stored_tensors": { "model.layers.96.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.96.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.96.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.96.mlp.gate_proj": { "stored_tensors": { "model.layers.96.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.96.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.96.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.96.mlp.down_proj": { "stored_tensors": { "model.layers.96.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.96.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.96.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.97.post_attention_layernorm": { "stored_tensors": { "model.layers.97.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.97.mlp.up_proj": { "stored_tensors": { "model.layers.97.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.97.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.97.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.97.mlp.gate_proj": { "stored_tensors": { "model.layers.97.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.97.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.97.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.97.mlp.down_proj": { "stored_tensors": { "model.layers.97.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.97.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.97.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.98.input_layernorm": { "stored_tensors": { "model.layers.98.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.98.self_attn.q_proj": { "stored_tensors": { "model.layers.98.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.98.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.98.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.98.self_attn.k_proj": { "stored_tensors": { "model.layers.98.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.98.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.98.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.98.self_attn.v_proj": { "stored_tensors": { "model.layers.98.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.98.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.98.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.98.self_attn.o_proj": { "stored_tensors": { "model.layers.98.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.98.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.98.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.98.post_attention_layernorm": { "stored_tensors": { "model.layers.98.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.98.mlp.up_proj": { "stored_tensors": { "model.layers.98.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.98.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.98.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.98.mlp.gate_proj": { "stored_tensors": { "model.layers.98.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.98.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.98.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.98.mlp.down_proj": { "stored_tensors": { "model.layers.98.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.98.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.98.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.99": { "stored_tensors": {} }, "model.layers.100.post_attention_layernorm": { "stored_tensors": { "model.layers.100.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.100.mlp.up_proj": { "stored_tensors": { "model.layers.100.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.100.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.100.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.100.mlp.gate_proj": { "stored_tensors": { "model.layers.100.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.100.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.100.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.100.mlp.down_proj": { "stored_tensors": { "model.layers.100.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.100.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.100.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.101.post_attention_layernorm": { "stored_tensors": { "model.layers.101.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.101.mlp.up_proj": { "stored_tensors": { "model.layers.101.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.101.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.101.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.101.mlp.gate_proj": { "stored_tensors": { "model.layers.101.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.101.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.101.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.101.mlp.down_proj": { "stored_tensors": { "model.layers.101.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.101.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.101.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.102.post_attention_layernorm": { "stored_tensors": { "model.layers.102.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.102.mlp.up_proj": { "stored_tensors": { "model.layers.102.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.102.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.102.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.102.mlp.gate_proj": { "stored_tensors": { "model.layers.102.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.102.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.102.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.102.mlp.down_proj": { "stored_tensors": { "model.layers.102.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.102.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.102.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.103.post_attention_layernorm": { "stored_tensors": { "model.layers.103.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.103.mlp.up_proj": { "stored_tensors": { "model.layers.103.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.103.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.103.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.103.mlp.gate_proj": { "stored_tensors": { "model.layers.103.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.103.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.103.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.103.mlp.down_proj": { "stored_tensors": { "model.layers.103.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.103.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.103.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.104.post_attention_layernorm": { "stored_tensors": { "model.layers.104.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.104.mlp.up_proj": { "stored_tensors": { "model.layers.104.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.104.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.104.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.104.mlp.gate_proj": { "stored_tensors": { "model.layers.104.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.104.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.104.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.104.mlp.down_proj": { "stored_tensors": { "model.layers.104.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.104.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.104.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.105.post_attention_layernorm": { "stored_tensors": { "model.layers.105.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.105.mlp.up_proj": { "stored_tensors": { "model.layers.105.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.105.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.105.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.105.mlp.gate_proj": { "stored_tensors": { "model.layers.105.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.105.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.105.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.105.mlp.down_proj": { "stored_tensors": { "model.layers.105.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.105.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.105.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.106.post_attention_layernorm": { "stored_tensors": { "model.layers.106.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.106.mlp.up_proj": { "stored_tensors": { "model.layers.106.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.106.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.106.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.106.mlp.gate_proj": { "stored_tensors": { "model.layers.106.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.106.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.106.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.106.mlp.down_proj": { "stored_tensors": { "model.layers.106.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.106.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.106.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.107.input_layernorm": { "stored_tensors": { "model.layers.107.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.107.self_attn.q_proj": { "stored_tensors": { "model.layers.107.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.107.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.107.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.107.self_attn.k_proj": { "stored_tensors": { "model.layers.107.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.107.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.107.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.107.self_attn.v_proj": { "stored_tensors": { "model.layers.107.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.107.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.107.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.107.self_attn.o_proj": { "stored_tensors": { "model.layers.107.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.107.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.107.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.107.post_attention_layernorm": { "stored_tensors": { "model.layers.107.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.107.mlp.up_proj": { "stored_tensors": { "model.layers.107.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.107.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.107.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.107.mlp.gate_proj": { "stored_tensors": { "model.layers.107.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.107.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.107.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.107.mlp.down_proj": { "stored_tensors": { "model.layers.107.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.107.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.107.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.108": { "stored_tensors": {} }, "model.layers.109": { "stored_tensors": {} }, "model.layers.110": { "stored_tensors": {} }, "model.layers.111": { "stored_tensors": {} }, "model.layers.112": { "stored_tensors": {} }, "model.layers.113.input_layernorm": { "stored_tensors": { "model.layers.113.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.113.self_attn.q_proj": { "stored_tensors": { "model.layers.113.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.113.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.113.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.113.self_attn.k_proj": { "stored_tensors": { "model.layers.113.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.113.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.113.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.113.self_attn.v_proj": { "stored_tensors": { "model.layers.113.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.113.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.113.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.113.self_attn.o_proj": { "stored_tensors": { "model.layers.113.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.113.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.113.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.113.post_attention_layernorm": { "stored_tensors": { "model.layers.113.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.113.mlp.up_proj": { "stored_tensors": { "model.layers.113.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.113.mlp.up_proj.svh": { "shape": [ 32000 ], "n_bytes": 64000, "dtype": "torch.float16" }, "model.layers.113.mlp.up_proj.trellis": { "shape": [ 1024, 2000, 48 ], "n_bytes": 196608000, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.113.mlp.gate_proj": { "stored_tensors": { "model.layers.113.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.113.mlp.gate_proj.svh": { "shape": [ 32000 ], "n_bytes": 64000, "dtype": "torch.float16" }, "model.layers.113.mlp.gate_proj.trellis": { "shape": [ 1024, 2000, 48 ], "n_bytes": 196608000, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.113.mlp.down_proj": { "stored_tensors": { "model.layers.113.mlp.down_proj.suh": { "shape": [ 32000 ], "n_bytes": 64000, "dtype": "torch.float16" }, "model.layers.113.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.113.mlp.down_proj.trellis": { "shape": [ 2000, 1024, 64 ], "n_bytes": 262144000, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.114.input_layernorm": { "stored_tensors": { "model.layers.114.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.114.self_attn.q_proj": { "stored_tensors": { "model.layers.114.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.114.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.114.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.114.self_attn.k_proj": { "stored_tensors": { "model.layers.114.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.114.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.114.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.114.self_attn.v_proj": { "stored_tensors": { "model.layers.114.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.114.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.114.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.114.self_attn.o_proj": { "stored_tensors": { "model.layers.114.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.114.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.114.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.114.post_attention_layernorm": { "stored_tensors": { "model.layers.114.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.114.mlp.up_proj": { "stored_tensors": { "model.layers.114.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.114.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.114.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.114.mlp.gate_proj": { "stored_tensors": { "model.layers.114.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.114.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.114.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.114.mlp.down_proj": { "stored_tensors": { "model.layers.114.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.114.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.114.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.115.post_attention_layernorm": { "stored_tensors": { "model.layers.115.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.115.mlp.up_proj": { "stored_tensors": { "model.layers.115.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.115.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.115.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.115.mlp.gate_proj": { "stored_tensors": { "model.layers.115.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.115.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.115.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.115.mlp.down_proj": { "stored_tensors": { "model.layers.115.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.115.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.115.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.116.input_layernorm": { "stored_tensors": { "model.layers.116.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.116.self_attn.q_proj": { "stored_tensors": { "model.layers.116.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.116.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.116.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.116.self_attn.k_proj": { "stored_tensors": { "model.layers.116.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.116.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.116.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.116.self_attn.v_proj": { "stored_tensors": { "model.layers.116.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.116.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.116.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.116.self_attn.o_proj": { "stored_tensors": { "model.layers.116.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.116.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.116.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.116.post_attention_layernorm": { "stored_tensors": { "model.layers.116.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.116.mlp.up_proj": { "stored_tensors": { "model.layers.116.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.116.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.116.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.116.mlp.gate_proj": { "stored_tensors": { "model.layers.116.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.116.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.116.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.116.mlp.down_proj": { "stored_tensors": { "model.layers.116.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.116.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.116.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.117": { "stored_tensors": {} }, "model.layers.118": { "stored_tensors": {} }, "model.layers.119": { "stored_tensors": {} }, "model.layers.120": { "stored_tensors": {} }, "model.layers.121": { "stored_tensors": {} }, "model.layers.122": { "stored_tensors": {} }, "model.layers.123": { "stored_tensors": {} }, "model.layers.124": { "stored_tensors": {} }, "model.layers.125.post_attention_layernorm": { "stored_tensors": { "model.layers.125.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.125.mlp.up_proj.slice.0": { "stored_tensors": { "model.layers.125.mlp.up_proj.slice.0.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.0.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.0.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.gate_proj.slice.0": { "stored_tensors": { "model.layers.125.mlp.gate_proj.slice.0.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.0.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.0.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.down_proj.slice.0": { "stored_tensors": { "model.layers.125.mlp.down_proj.slice.0.suh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.0.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.0.trellis": { "shape": [ 3120, 1024, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.up_proj.slice.1": { "stored_tensors": { "model.layers.125.mlp.up_proj.slice.1.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.1.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.1.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.gate_proj.slice.1": { "stored_tensors": { "model.layers.125.mlp.gate_proj.slice.1.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.1.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.1.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.down_proj.slice.1": { "stored_tensors": { "model.layers.125.mlp.down_proj.slice.1.suh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.1.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.1.trellis": { "shape": [ 3120, 1024, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.up_proj.slice.2": { "stored_tensors": { "model.layers.125.mlp.up_proj.slice.2.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.2.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.2.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.gate_proj.slice.2": { "stored_tensors": { "model.layers.125.mlp.gate_proj.slice.2.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.2.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.2.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.down_proj.slice.2": { "stored_tensors": { "model.layers.125.mlp.down_proj.slice.2.suh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.2.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.2.trellis": { "shape": [ 3120, 1024, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.up_proj.slice.3": { "stored_tensors": { "model.layers.125.mlp.up_proj.slice.3.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.3.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.3.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.gate_proj.slice.3": { "stored_tensors": { "model.layers.125.mlp.gate_proj.slice.3.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.3.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.3.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.down_proj.slice.3": { "stored_tensors": { "model.layers.125.mlp.down_proj.slice.3.suh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.3.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.3.trellis": { "shape": [ 3120, 1024, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.up_proj.slice.4": { "stored_tensors": { "model.layers.125.mlp.up_proj.slice.4.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.4.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.4.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.gate_proj.slice.4": { "stored_tensors": { "model.layers.125.mlp.gate_proj.slice.4.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.4.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.4.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.down_proj.slice.4": { "stored_tensors": { "model.layers.125.mlp.down_proj.slice.4.suh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.4.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.4.trellis": { "shape": [ 3120, 1024, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.up_proj.slice.5": { "stored_tensors": { "model.layers.125.mlp.up_proj.slice.5.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.5.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.5.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.gate_proj.slice.5": { "stored_tensors": { "model.layers.125.mlp.gate_proj.slice.5.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.5.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.5.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.down_proj.slice.5": { "stored_tensors": { "model.layers.125.mlp.down_proj.slice.5.suh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.5.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.5.trellis": { "shape": [ 3120, 1024, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.up_proj.slice.6": { "stored_tensors": { "model.layers.125.mlp.up_proj.slice.6.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.6.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.6.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.gate_proj.slice.6": { "stored_tensors": { "model.layers.125.mlp.gate_proj.slice.6.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.6.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.6.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.down_proj.slice.6": { "stored_tensors": { "model.layers.125.mlp.down_proj.slice.6.suh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.6.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.6.trellis": { "shape": [ 3120, 1024, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.up_proj.slice.7": { "stored_tensors": { "model.layers.125.mlp.up_proj.slice.7.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.7.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.up_proj.slice.7.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.gate_proj.slice.7": { "stored_tensors": { "model.layers.125.mlp.gate_proj.slice.7.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.7.svh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.gate_proj.slice.7.trellis": { "shape": [ 1024, 3120, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.125.mlp.down_proj.slice.7": { "stored_tensors": { "model.layers.125.mlp.down_proj.slice.7.suh": { "shape": [ 49920 ], "n_bytes": 99840, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.7.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.125.mlp.down_proj.slice.7.trellis": { "shape": [ 3120, 1024, 48 ], "n_bytes": 306708480, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.126": { "stored_tensors": {} }, "model.layers.127": { "stored_tensors": {} }, "model.layers.128": { "stored_tensors": {} }, "model.layers.129": { "stored_tensors": {} }, "model.layers.130": { "stored_tensors": {} }, "model.layers.131": { "stored_tensors": {} }, "model.layers.132": { "stored_tensors": {} }, "model.layers.133": { "stored_tensors": {} }, "model.layers.134.post_attention_layernorm": { "stored_tensors": { "model.layers.134.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.134.mlp.up_proj.slice.0": { "stored_tensors": { "model.layers.134.mlp.up_proj.slice.0.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.0.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.0.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.gate_proj.slice.0": { "stored_tensors": { "model.layers.134.mlp.gate_proj.slice.0.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.0.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.0.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.down_proj.slice.0": { "stored_tensors": { "model.layers.134.mlp.down_proj.slice.0.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.0.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.0.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.134.mlp.up_proj.slice.1": { "stored_tensors": { "model.layers.134.mlp.up_proj.slice.1.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.1.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.1.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.gate_proj.slice.1": { "stored_tensors": { "model.layers.134.mlp.gate_proj.slice.1.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.1.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.1.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.down_proj.slice.1": { "stored_tensors": { "model.layers.134.mlp.down_proj.slice.1.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.1.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.1.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.134.mlp.up_proj.slice.2": { "stored_tensors": { "model.layers.134.mlp.up_proj.slice.2.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.2.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.2.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.gate_proj.slice.2": { "stored_tensors": { "model.layers.134.mlp.gate_proj.slice.2.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.2.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.2.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.down_proj.slice.2": { "stored_tensors": { "model.layers.134.mlp.down_proj.slice.2.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.2.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.2.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.134.mlp.up_proj.slice.3": { "stored_tensors": { "model.layers.134.mlp.up_proj.slice.3.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.3.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.3.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.gate_proj.slice.3": { "stored_tensors": { "model.layers.134.mlp.gate_proj.slice.3.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.3.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.3.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.down_proj.slice.3": { "stored_tensors": { "model.layers.134.mlp.down_proj.slice.3.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.3.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.3.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.134.mlp.up_proj.slice.4": { "stored_tensors": { "model.layers.134.mlp.up_proj.slice.4.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.4.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.4.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.gate_proj.slice.4": { "stored_tensors": { "model.layers.134.mlp.gate_proj.slice.4.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.4.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.4.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.down_proj.slice.4": { "stored_tensors": { "model.layers.134.mlp.down_proj.slice.4.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.4.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.4.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.134.mlp.up_proj.slice.5": { "stored_tensors": { "model.layers.134.mlp.up_proj.slice.5.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.5.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.5.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.gate_proj.slice.5": { "stored_tensors": { "model.layers.134.mlp.gate_proj.slice.5.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.5.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.5.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.down_proj.slice.5": { "stored_tensors": { "model.layers.134.mlp.down_proj.slice.5.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.5.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.5.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.134.mlp.up_proj.slice.6": { "stored_tensors": { "model.layers.134.mlp.up_proj.slice.6.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.6.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.6.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.gate_proj.slice.6": { "stored_tensors": { "model.layers.134.mlp.gate_proj.slice.6.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.6.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.6.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.down_proj.slice.6": { "stored_tensors": { "model.layers.134.mlp.down_proj.slice.6.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.6.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.6.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.134.mlp.up_proj.slice.7": { "stored_tensors": { "model.layers.134.mlp.up_proj.slice.7.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.7.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.up_proj.slice.7.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.gate_proj.slice.7": { "stored_tensors": { "model.layers.134.mlp.gate_proj.slice.7.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.7.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.gate_proj.slice.7.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.134.mlp.down_proj.slice.7": { "stored_tensors": { "model.layers.134.mlp.down_proj.slice.7.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.7.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.134.mlp.down_proj.slice.7.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.135": { "stored_tensors": {} }, "model.layers.136": { "stored_tensors": {} }, "model.layers.137": { "stored_tensors": {} }, "model.layers.138": { "stored_tensors": {} }, "model.layers.139": { "stored_tensors": {} }, "model.layers.140": { "stored_tensors": {} }, "model.layers.141": { "stored_tensors": {} }, "model.layers.142": { "stored_tensors": {} }, "model.layers.143.post_attention_layernorm": { "stored_tensors": { "model.layers.143.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.143.mlp.up_proj.slice.0": { "stored_tensors": { "model.layers.143.mlp.up_proj.slice.0.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.0.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.0.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.gate_proj.slice.0": { "stored_tensors": { "model.layers.143.mlp.gate_proj.slice.0.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.0.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.0.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.down_proj.slice.0": { "stored_tensors": { "model.layers.143.mlp.down_proj.slice.0.suh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.0.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.0.trellis": { "shape": [ 3056, 1024, 64 ], "n_bytes": 400556032, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.143.mlp.up_proj.slice.1": { "stored_tensors": { "model.layers.143.mlp.up_proj.slice.1.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.1.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.1.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.gate_proj.slice.1": { "stored_tensors": { "model.layers.143.mlp.gate_proj.slice.1.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.1.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.1.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.down_proj.slice.1": { "stored_tensors": { "model.layers.143.mlp.down_proj.slice.1.suh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.1.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.1.trellis": { "shape": [ 3056, 1024, 64 ], "n_bytes": 400556032, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.143.mlp.up_proj.slice.2": { "stored_tensors": { "model.layers.143.mlp.up_proj.slice.2.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.2.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.2.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.gate_proj.slice.2": { "stored_tensors": { "model.layers.143.mlp.gate_proj.slice.2.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.2.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.2.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.down_proj.slice.2": { "stored_tensors": { "model.layers.143.mlp.down_proj.slice.2.suh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.2.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.2.trellis": { "shape": [ 3056, 1024, 64 ], "n_bytes": 400556032, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.143.mlp.up_proj.slice.3": { "stored_tensors": { "model.layers.143.mlp.up_proj.slice.3.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.3.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.3.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.gate_proj.slice.3": { "stored_tensors": { "model.layers.143.mlp.gate_proj.slice.3.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.3.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.3.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.down_proj.slice.3": { "stored_tensors": { "model.layers.143.mlp.down_proj.slice.3.suh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.3.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.3.trellis": { "shape": [ 3056, 1024, 64 ], "n_bytes": 400556032, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.143.mlp.up_proj.slice.4": { "stored_tensors": { "model.layers.143.mlp.up_proj.slice.4.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.4.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.4.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.gate_proj.slice.4": { "stored_tensors": { "model.layers.143.mlp.gate_proj.slice.4.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.4.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.4.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.down_proj.slice.4": { "stored_tensors": { "model.layers.143.mlp.down_proj.slice.4.suh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.4.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.4.trellis": { "shape": [ 3056, 1024, 64 ], "n_bytes": 400556032, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.143.mlp.up_proj.slice.5": { "stored_tensors": { "model.layers.143.mlp.up_proj.slice.5.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.5.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.5.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.gate_proj.slice.5": { "stored_tensors": { "model.layers.143.mlp.gate_proj.slice.5.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.5.svh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.5.trellis": { "shape": [ 1024, 3056, 48 ], "n_bytes": 300417024, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.down_proj.slice.5": { "stored_tensors": { "model.layers.143.mlp.down_proj.slice.5.suh": { "shape": [ 48896 ], "n_bytes": 97792, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.5.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.5.trellis": { "shape": [ 3056, 1024, 64 ], "n_bytes": 400556032, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.143.mlp.up_proj.slice.6": { "stored_tensors": { "model.layers.143.mlp.up_proj.slice.6.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.6.svh": { "shape": [ 49664 ], "n_bytes": 99328, "dtype": "torch.float16" }, "model.layers.143.mlp.up_proj.slice.6.trellis": { "shape": [ 1024, 3104, 48 ], "n_bytes": 305135616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.gate_proj.slice.6": { "stored_tensors": { "model.layers.143.mlp.gate_proj.slice.6.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.6.svh": { "shape": [ 49664 ], "n_bytes": 99328, "dtype": "torch.float16" }, "model.layers.143.mlp.gate_proj.slice.6.trellis": { "shape": [ 1024, 3104, 48 ], "n_bytes": 305135616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.143.mlp.down_proj.slice.6": { "stored_tensors": { "model.layers.143.mlp.down_proj.slice.6.suh": { "shape": [ 49664 ], "n_bytes": 99328, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.6.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.143.mlp.down_proj.slice.6.trellis": { "shape": [ 3104, 1024, 64 ], "n_bytes": 406847488, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.144": { "stored_tensors": {} }, "model.layers.145": { "stored_tensors": {} }, "model.layers.146": { "stored_tensors": {} }, "model.layers.147": { "stored_tensors": {} }, "model.layers.148": { "stored_tensors": {} }, "model.layers.149.post_attention_layernorm": { "stored_tensors": { "model.layers.149.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.149.mlp.up_proj.slice.0": { "stored_tensors": { "model.layers.149.mlp.up_proj.slice.0.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.0.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.0.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.gate_proj.slice.0": { "stored_tensors": { "model.layers.149.mlp.gate_proj.slice.0.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.0.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.0.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.down_proj.slice.0": { "stored_tensors": { "model.layers.149.mlp.down_proj.slice.0.suh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.0.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.0.trellis": { "shape": [ 3136, 1024, 64 ], "n_bytes": 411041792, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.149.mlp.up_proj.slice.1": { "stored_tensors": { "model.layers.149.mlp.up_proj.slice.1.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.1.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.1.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.gate_proj.slice.1": { "stored_tensors": { "model.layers.149.mlp.gate_proj.slice.1.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.1.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.1.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.down_proj.slice.1": { "stored_tensors": { "model.layers.149.mlp.down_proj.slice.1.suh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.1.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.1.trellis": { "shape": [ 3136, 1024, 64 ], "n_bytes": 411041792, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.149.mlp.up_proj.slice.2": { "stored_tensors": { "model.layers.149.mlp.up_proj.slice.2.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.2.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.2.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.gate_proj.slice.2": { "stored_tensors": { "model.layers.149.mlp.gate_proj.slice.2.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.2.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.2.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.down_proj.slice.2": { "stored_tensors": { "model.layers.149.mlp.down_proj.slice.2.suh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.2.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.2.trellis": { "shape": [ 3136, 1024, 64 ], "n_bytes": 411041792, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.149.mlp.up_proj.slice.3": { "stored_tensors": { "model.layers.149.mlp.up_proj.slice.3.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.3.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.3.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.gate_proj.slice.3": { "stored_tensors": { "model.layers.149.mlp.gate_proj.slice.3.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.3.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.3.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.down_proj.slice.3": { "stored_tensors": { "model.layers.149.mlp.down_proj.slice.3.suh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.3.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.3.trellis": { "shape": [ 3136, 1024, 64 ], "n_bytes": 411041792, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.149.mlp.up_proj.slice.4": { "stored_tensors": { "model.layers.149.mlp.up_proj.slice.4.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.4.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.4.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.gate_proj.slice.4": { "stored_tensors": { "model.layers.149.mlp.gate_proj.slice.4.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.4.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.4.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.down_proj.slice.4": { "stored_tensors": { "model.layers.149.mlp.down_proj.slice.4.suh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.4.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.4.trellis": { "shape": [ 3136, 1024, 64 ], "n_bytes": 411041792, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.149.mlp.up_proj.slice.5": { "stored_tensors": { "model.layers.149.mlp.up_proj.slice.5.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.5.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.up_proj.slice.5.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.gate_proj.slice.5": { "stored_tensors": { "model.layers.149.mlp.gate_proj.slice.5.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.5.svh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.gate_proj.slice.5.trellis": { "shape": [ 1024, 3136, 48 ], "n_bytes": 308281344, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.149.mlp.down_proj.slice.5": { "stored_tensors": { "model.layers.149.mlp.down_proj.slice.5.suh": { "shape": [ 50176 ], "n_bytes": 100352, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.5.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.149.mlp.down_proj.slice.5.trellis": { "shape": [ 3136, 1024, 64 ], "n_bytes": 411041792, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.150.post_attention_layernorm": { "stored_tensors": { "model.layers.150.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.150.mlp.up_proj": { "stored_tensors": { "model.layers.150.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.150.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.150.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.150.mlp.gate_proj": { "stored_tensors": { "model.layers.150.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.150.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.150.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.150.mlp.down_proj": { "stored_tensors": { "model.layers.150.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.150.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.150.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.151.input_layernorm": { "stored_tensors": { "model.layers.151.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.151.self_attn.q_proj": { "stored_tensors": { "model.layers.151.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.151.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.151.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.151.self_attn.k_proj": { "stored_tensors": { "model.layers.151.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.151.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.151.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.151.self_attn.v_proj": { "stored_tensors": { "model.layers.151.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.151.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.151.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.151.self_attn.o_proj": { "stored_tensors": { "model.layers.151.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.151.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.151.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.151.post_attention_layernorm": { "stored_tensors": { "model.layers.151.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.151.mlp.up_proj": { "stored_tensors": { "model.layers.151.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.151.mlp.up_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.151.mlp.up_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.151.mlp.gate_proj": { "stored_tensors": { "model.layers.151.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.151.mlp.gate_proj.svh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.151.mlp.gate_proj.trellis": { "shape": [ 1024, 1344, 48 ], "n_bytes": 132120576, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.151.mlp.down_proj": { "stored_tensors": { "model.layers.151.mlp.down_proj.suh": { "shape": [ 21504 ], "n_bytes": 43008, "dtype": "torch.float16" }, "model.layers.151.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.151.mlp.down_proj.trellis": { "shape": [ 1344, 1024, 64 ], "n_bytes": 176160768, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.152.input_layernorm": { "stored_tensors": { "model.layers.152.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.152.self_attn.q_proj": { "stored_tensors": { "model.layers.152.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.152.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.152.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.152.self_attn.k_proj": { "stored_tensors": { "model.layers.152.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.152.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.152.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.152.self_attn.v_proj": { "stored_tensors": { "model.layers.152.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.152.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.152.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.152.self_attn.o_proj": { "stored_tensors": { "model.layers.152.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.152.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.152.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.152.post_attention_layernorm": { "stored_tensors": { "model.layers.152.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.152.mlp.up_proj": { "stored_tensors": { "model.layers.152.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.152.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.152.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.152.mlp.gate_proj": { "stored_tensors": { "model.layers.152.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.152.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.152.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.152.mlp.down_proj": { "stored_tensors": { "model.layers.152.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.152.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.152.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.153": { "stored_tensors": {} }, "model.layers.154.input_layernorm": { "stored_tensors": { "model.layers.154.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.154.self_attn.q_proj": { "stored_tensors": { "model.layers.154.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.154.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.154.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.154.self_attn.k_proj": { "stored_tensors": { "model.layers.154.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.154.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.154.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.154.self_attn.v_proj": { "stored_tensors": { "model.layers.154.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.154.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.154.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.154.self_attn.o_proj": { "stored_tensors": { "model.layers.154.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.154.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.154.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.154.post_attention_layernorm": { "stored_tensors": { "model.layers.154.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.154.mlp.up_proj": { "stored_tensors": { "model.layers.154.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.154.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.154.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.154.mlp.gate_proj": { "stored_tensors": { "model.layers.154.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.154.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.154.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.154.mlp.down_proj": { "stored_tensors": { "model.layers.154.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.154.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.154.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.155.input_layernorm": { "stored_tensors": { "model.layers.155.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.155.self_attn.q_proj": { "stored_tensors": { "model.layers.155.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.155.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.155.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.155.self_attn.k_proj": { "stored_tensors": { "model.layers.155.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.155.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.155.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.155.self_attn.v_proj": { "stored_tensors": { "model.layers.155.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.155.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.155.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.155.self_attn.o_proj": { "stored_tensors": { "model.layers.155.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.155.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.155.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.155.post_attention_layernorm": { "stored_tensors": { "model.layers.155.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.155.mlp.up_proj": { "stored_tensors": { "model.layers.155.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.155.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.155.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.155.mlp.gate_proj": { "stored_tensors": { "model.layers.155.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.155.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.155.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.155.mlp.down_proj": { "stored_tensors": { "model.layers.155.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.155.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.155.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 64 ], "n_bytes": 218103808, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.156.input_layernorm": { "stored_tensors": { "model.layers.156.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.156.self_attn.q_proj": { "stored_tensors": { "model.layers.156.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.156.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.156.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.156.self_attn.k_proj": { "stored_tensors": { "model.layers.156.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.156.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.156.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.156.self_attn.v_proj": { "stored_tensors": { "model.layers.156.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.156.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.156.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.156.self_attn.o_proj": { "stored_tensors": { "model.layers.156.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.156.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.156.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.156.post_attention_layernorm": { "stored_tensors": { "model.layers.156.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.156.mlp.up_proj": { "stored_tensors": { "model.layers.156.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.156.mlp.up_proj.svh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.156.mlp.up_proj.trellis": { "shape": [ 1024, 2336, 48 ], "n_bytes": 229638144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.156.mlp.gate_proj": { "stored_tensors": { "model.layers.156.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.156.mlp.gate_proj.svh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.156.mlp.gate_proj.trellis": { "shape": [ 1024, 2336, 48 ], "n_bytes": 229638144, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.156.mlp.down_proj": { "stored_tensors": { "model.layers.156.mlp.down_proj.suh": { "shape": [ 37376 ], "n_bytes": 74752, "dtype": "torch.float16" }, "model.layers.156.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.156.mlp.down_proj.trellis": { "shape": [ 2336, 1024, 64 ], "n_bytes": 306184192, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.157.input_layernorm": { "stored_tensors": { "model.layers.157.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.157.self_attn.q_proj": { "stored_tensors": { "model.layers.157.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.157.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.157.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.157.self_attn.k_proj": { "stored_tensors": { "model.layers.157.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.157.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.157.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.157.self_attn.v_proj": { "stored_tensors": { "model.layers.157.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.157.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.157.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.157.self_attn.o_proj": { "stored_tensors": { "model.layers.157.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.157.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.157.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.157.post_attention_layernorm": { "stored_tensors": { "model.layers.157.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.157.mlp.up_proj": { "stored_tensors": { "model.layers.157.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.157.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.157.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.157.mlp.gate_proj": { "stored_tensors": { "model.layers.157.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.157.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.157.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.157.mlp.down_proj": { "stored_tensors": { "model.layers.157.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.157.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.157.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.158.input_layernorm": { "stored_tensors": { "model.layers.158.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.158.self_attn.q_proj": { "stored_tensors": { "model.layers.158.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.158.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.158.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.158.self_attn.k_proj": { "stored_tensors": { "model.layers.158.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.158.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.158.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.158.self_attn.v_proj": { "stored_tensors": { "model.layers.158.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.158.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.158.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.158.self_attn.o_proj": { "stored_tensors": { "model.layers.158.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.158.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.158.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.158.post_attention_layernorm": { "stored_tensors": { "model.layers.158.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.158.mlp.up_proj": { "stored_tensors": { "model.layers.158.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.158.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.158.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.158.mlp.gate_proj": { "stored_tensors": { "model.layers.158.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.158.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.158.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.158.mlp.down_proj": { "stored_tensors": { "model.layers.158.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.158.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.158.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.159.input_layernorm": { "stored_tensors": { "model.layers.159.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.159.self_attn.q_proj": { "stored_tensors": { "model.layers.159.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.159.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.159.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.159.self_attn.k_proj": { "stored_tensors": { "model.layers.159.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.159.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.159.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 48 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.159.self_attn.v_proj": { "stored_tensors": { "model.layers.159.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.159.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.159.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 64 ], "n_bytes": 8388608, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.159.self_attn.o_proj": { "stored_tensors": { "model.layers.159.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.159.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.159.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 48 ], "n_bytes": 100663296, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.159.post_attention_layernorm": { "stored_tensors": { "model.layers.159.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.159.mlp.up_proj": { "stored_tensors": { "model.layers.159.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.159.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.159.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.159.mlp.gate_proj": { "stored_tensors": { "model.layers.159.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.159.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.159.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.159.mlp.down_proj": { "stored_tensors": { "model.layers.159.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.159.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.159.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 64 ], "n_bytes": 436207616, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.160.input_layernorm": { "stored_tensors": { "model.layers.160.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.160.self_attn.q_proj": { "stored_tensors": { "model.layers.160.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.160.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.160.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.160.self_attn.k_proj": { "stored_tensors": { "model.layers.160.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.160.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.160.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.160.self_attn.v_proj": { "stored_tensors": { "model.layers.160.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.160.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.160.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.160.self_attn.o_proj": { "stored_tensors": { "model.layers.160.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.160.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.160.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.160.post_attention_layernorm": { "stored_tensors": { "model.layers.160.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.160.mlp.up_proj": { "stored_tensors": { "model.layers.160.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.160.mlp.up_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.160.mlp.up_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.160.mlp.gate_proj": { "stored_tensors": { "model.layers.160.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.160.mlp.gate_proj.svh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.160.mlp.gate_proj.trellis": { "shape": [ 1024, 3328, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.160.mlp.down_proj": { "stored_tensors": { "model.layers.160.mlp.down_proj.suh": { "shape": [ 53248 ], "n_bytes": 106496, "dtype": "torch.float16" }, "model.layers.160.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.160.mlp.down_proj.trellis": { "shape": [ 3328, 1024, 48 ], "n_bytes": 327155712, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.161.input_layernorm": { "stored_tensors": { "model.layers.161.input_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.161.self_attn.q_proj": { "stored_tensors": { "model.layers.161.self_attn.q_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.161.self_attn.q_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.161.self_attn.q_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.161.self_attn.k_proj": { "stored_tensors": { "model.layers.161.self_attn.k_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.161.self_attn.k_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.161.self_attn.k_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.161.self_attn.v_proj": { "stored_tensors": { "model.layers.161.self_attn.v_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.161.self_attn.v_proj.svh": { "shape": [ 1024 ], "n_bytes": 2048, "dtype": "torch.float16" }, "model.layers.161.self_attn.v_proj.trellis": { "shape": [ 1024, 64, 80 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 5 }, "model.layers.161.self_attn.o_proj": { "stored_tensors": { "model.layers.161.self_attn.o_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.161.self_attn.o_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.161.self_attn.o_proj.trellis": { "shape": [ 1024, 1024, 64 ], "n_bytes": 134217728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4 }, "model.layers.161.post_attention_layernorm": { "stored_tensors": { "model.layers.161.post_attention_layernorm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "model.layers.161.mlp.up_proj": { "stored_tensors": { "model.layers.161.mlp.up_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.161.mlp.up_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.161.mlp.up_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.161.mlp.gate_proj": { "stored_tensors": { "model.layers.161.mlp.gate_proj.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.161.mlp.gate_proj.svh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.161.mlp.gate_proj.trellis": { "shape": [ 1024, 1664, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.layers.161.mlp.down_proj": { "stored_tensors": { "model.layers.161.mlp.down_proj.suh": { "shape": [ 26624 ], "n_bytes": 53248, "dtype": "torch.float16" }, "model.layers.161.mlp.down_proj.svh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "model.layers.161.mlp.down_proj.trellis": { "shape": [ 1664, 1024, 48 ], "n_bytes": 163577856, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3 }, "model.norm": { "stored_tensors": { "model.norm.weight": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" } } }, "lm_head": { "stored_tensors": { "lm_head.suh": { "shape": [ 16384 ], "n_bytes": 32768, "dtype": "torch.float16" }, "lm_head.svh": { "shape": [ 128256 ], "n_bytes": 256512, "dtype": "torch.float16" }, "lm_head.trellis": { "shape": [ 1024, 8016, 96 ], "n_bytes": 1576009728, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 6 } } }