mgoin commited on
Commit
618beec
·
verified ·
1 Parent(s): 67ae38e

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +0 -5
  2. params.json +39 -3
README.md CHANGED
@@ -35,11 +35,6 @@ extra_gated_description: >-
35
  pipeline_tag: image-text-to-text
36
  ---
37
 
38
- Checkpoint of Mistral-Small-3.1-24B-Instruct-2503 with FP8 per-tensor quantization in the Mistral-format. Please run with vLLM like so:
39
- ```
40
- vllm serve nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8 --tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral --enable-auto-tool-choice --limit_mm_per_prompt 'image=10'
41
- ```
42
-
43
  # Model Card for Mistral-Small-3.1-24B-Instruct-2503
44
 
45
  Building upon Mistral Small 3 (2501), Mistral Small 3.1 (2503) **adds state-of-the-art vision understanding** and enhances **long context capabilities up to 128k tokens** without compromising text performance.
 
35
  pipeline_tag: image-text-to-text
36
  ---
37
 
 
 
 
 
 
38
  # Model Card for Mistral-Small-3.1-24B-Instruct-2503
39
 
40
  Building upon Mistral Small 3 (2501), Mistral Small 3.1 (2503) **adds state-of-the-art vision understanding** and enhances **long context capabilities up to 128k tokens** without compromising text performance.
params.json CHANGED
@@ -27,7 +27,43 @@
27
  "image_size": 1540
28
  },
29
  "quantization": {
30
- "qformat_weight": "fp8_e4m3",
31
- "activation_scheme": "dynamic"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
- }
 
27
  "image_size": 1540
28
  },
29
  "quantization": {
30
+ "config_groups": {
31
+ "group_0": {
32
+ "input_activations": {
33
+ "actorder": null,
34
+ "block_structure": null,
35
+ "dynamic": true,
36
+ "group_size": null,
37
+ "num_bits": 8,
38
+ "observer": null,
39
+ "observer_kwargs": {},
40
+ "strategy": "token",
41
+ "symmetric": true,
42
+ "type": "float"
43
+ },
44
+ "targets": [
45
+ "Linear"
46
+ ],
47
+ "weights": {
48
+ "actorder": null,
49
+ "block_structure": null,
50
+ "dynamic": false,
51
+ "group_size": null,
52
+ "num_bits": 8,
53
+ "observer": "minmax",
54
+ "observer_kwargs": {},
55
+ "strategy": "tensor",
56
+ "symmetric": true,
57
+ "type": "float"
58
+ }
59
+ }
60
+ },
61
+ "format": "float-quantized",
62
+ "ignore": [
63
+ "lm_head",
64
+ "output"
65
+ ],
66
+ "quant_method": "compressed-tensors",
67
+ "quantization_status": "compressed"
68
  }
69
+ }