Update README.md
Browse files
README.md
CHANGED
@@ -31,16 +31,6 @@ model.quantize(calibration_dataset, batch_size=2)
|
|
31 |
model.save(quant_path)
|
32 |
```
|
33 |
|
34 |
-
## Running with VLLM
|
35 |
-
|
36 |
-
```bash
|
37 |
-
python -m vllm.entrypoints.openai.api_server \
|
38 |
-
--model /path/to/quantized_model \
|
39 |
-
--quantization gptq \
|
40 |
-
--dtype half \
|
41 |
-
--max-model-len 8192
|
42 |
-
```
|
43 |
-
|
44 |
## License
|
45 |
|
46 |
See LICENSE.txt
|
|
|
31 |
model.save(quant_path)
|
32 |
```
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
## License
|
35 |
|
36 |
See LICENSE.txt
|