Upload folder using huggingface_hub
Browse files- README.md +0 -5
- params.json +39 -3
README.md
CHANGED
@@ -35,11 +35,6 @@ extra_gated_description: >-
|
|
35 |
pipeline_tag: image-text-to-text
|
36 |
---
|
37 |
|
38 |
-
Checkpoint of Mistral-Small-3.1-24B-Instruct-2503 with FP8 per-tensor quantization in the Mistral-format. Please run with vLLM like so:
|
39 |
-
```
|
40 |
-
vllm serve nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8 --tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral --enable-auto-tool-choice --limit_mm_per_prompt 'image=10'
|
41 |
-
```
|
42 |
-
|
43 |
# Model Card for Mistral-Small-3.1-24B-Instruct-2503
|
44 |
|
45 |
Building upon Mistral Small 3 (2501), Mistral Small 3.1 (2503) **adds state-of-the-art vision understanding** and enhances **long context capabilities up to 128k tokens** without compromising text performance.
|
|
|
35 |
pipeline_tag: image-text-to-text
|
36 |
---
|
37 |
|
|
|
|
|
|
|
|
|
|
|
38 |
# Model Card for Mistral-Small-3.1-24B-Instruct-2503
|
39 |
|
40 |
Building upon Mistral Small 3 (2501), Mistral Small 3.1 (2503) **adds state-of-the-art vision understanding** and enhances **long context capabilities up to 128k tokens** without compromising text performance.
|
params.json
CHANGED
@@ -27,7 +27,43 @@
|
|
27 |
"image_size": 1540
|
28 |
},
|
29 |
"quantization": {
|
30 |
-
"
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
-
}
|
|
|
27 |
"image_size": 1540
|
28 |
},
|
29 |
"quantization": {
|
30 |
+
"config_groups": {
|
31 |
+
"group_0": {
|
32 |
+
"input_activations": {
|
33 |
+
"actorder": null,
|
34 |
+
"block_structure": null,
|
35 |
+
"dynamic": true,
|
36 |
+
"group_size": null,
|
37 |
+
"num_bits": 8,
|
38 |
+
"observer": null,
|
39 |
+
"observer_kwargs": {},
|
40 |
+
"strategy": "token",
|
41 |
+
"symmetric": true,
|
42 |
+
"type": "float"
|
43 |
+
},
|
44 |
+
"targets": [
|
45 |
+
"Linear"
|
46 |
+
],
|
47 |
+
"weights": {
|
48 |
+
"actorder": null,
|
49 |
+
"block_structure": null,
|
50 |
+
"dynamic": false,
|
51 |
+
"group_size": null,
|
52 |
+
"num_bits": 8,
|
53 |
+
"observer": "minmax",
|
54 |
+
"observer_kwargs": {},
|
55 |
+
"strategy": "tensor",
|
56 |
+
"symmetric": true,
|
57 |
+
"type": "float"
|
58 |
+
}
|
59 |
+
}
|
60 |
+
},
|
61 |
+
"format": "float-quantized",
|
62 |
+
"ignore": [
|
63 |
+
"lm_head",
|
64 |
+
"output"
|
65 |
+
],
|
66 |
+
"quant_method": "compressed-tensors",
|
67 |
+
"quantization_status": "compressed"
|
68 |
}
|
69 |
+
}
|