sandman4 commited on
Commit
00dbfaf
·
1 Parent(s): df47298

add README.md

Browse files
Files changed (1) hide show
  1. README.md +30 -3
README.md CHANGED
@@ -1,3 +1,30 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Qwen3-32B GPTQ 4bit
2
+
3
+ [GPTQModel](https://github.com/ModelCloud/GPTQModel)
4
+
5
+
6
+ ```python3
7
+ from datasets import load_dataset
8
+ from gptqmodel import GPTQModel, QuantizeConfig
9
+
10
+ import sys
11
+
12
+ model_id = sys.argv[1]
13
+ print(model_id)
14
+ quant_path = "quantized_model"
15
+
16
+ calibration_dataset = load_dataset(
17
+ "allenai/c4",
18
+ data_files="en/c4-train.00001-of-01024.json.gz",
19
+ split="train"
20
+ ).select(range(1024))["text"]
21
+
22
+ quant_config = QuantizeConfig(bits=4, group_size=128)
23
+
24
+ model = GPTQModel.load(model_id, quant_config)
25
+
26
+ # increase `batch_size` to match gpu/vram specs to speed up quantization
27
+ model.quantize(calibration_dataset, batch_size=2)
28
+
29
+ model.save(quant_path)
30
+ ```