Update README.md
Browse files
README.md
CHANGED
@@ -655,21 +655,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
655 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w8a8</td>
|
656 |
<td>1.70</td>
|
657 |
<td>0.8</td>
|
658 |
-
<td>
|
659 |
<td>1.1</td>
|
660 |
-
<td>
|
661 |
<td>1.3</td>
|
662 |
-
<td>
|
663 |
</tr>
|
664 |
<tr>
|
665 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
666 |
<td>1.48</td>
|
667 |
<td>0.5</td>
|
668 |
-
<td>
|
669 |
<td>1.0</td>
|
670 |
-
<td>
|
671 |
<td>1.4</td>
|
672 |
-
<td>
|
673 |
</tr>
|
674 |
<tr>
|
675 |
<<th rowspan="3" valign="top">H100x4</th>
|
@@ -686,21 +686,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
686 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-FP8-Dynamic</td>
|
687 |
<td>1.61</td>
|
688 |
<td>1.7</td>
|
689 |
-
<td>
|
690 |
<td>2.6</td>
|
691 |
-
<td>
|
692 |
<td>3.2</td>
|
693 |
-
<td>
|
694 |
</tr>
|
695 |
<tr>
|
696 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
697 |
<td>1.33</td>
|
698 |
<td>1.4</td>
|
699 |
-
<td>
|
700 |
<td>2.2</td>
|
701 |
-
<td>
|
702 |
<td>2.7</td>
|
703 |
-
<td>
|
704 |
</tr>
|
705 |
</tbody>
|
706 |
</table>
|
|
|
655 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w8a8</td>
|
656 |
<td>1.70</td>
|
657 |
<td>0.8</td>
|
658 |
+
<td>383</td>
|
659 |
<td>1.1</td>
|
660 |
+
<td>571</td>
|
661 |
<td>1.3</td>
|
662 |
+
<td>674</td>
|
663 |
</tr>
|
664 |
<tr>
|
665 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
666 |
<td>1.48</td>
|
667 |
<td>0.5</td>
|
668 |
+
<td>276</td>
|
669 |
<td>1.0</td>
|
670 |
+
<td>505</td>
|
671 |
<td>1.4</td>
|
672 |
+
<td>680</td>
|
673 |
</tr>
|
674 |
<tr>
|
675 |
<<th rowspan="3" valign="top">H100x4</th>
|
|
|
686 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-FP8-Dynamic</td>
|
687 |
<td>1.61</td>
|
688 |
<td>1.7</td>
|
689 |
+
<td>467</td>
|
690 |
<td>2.6</td>
|
691 |
+
<td>726</td>
|
692 |
<td>3.2</td>
|
693 |
+
<td>908</td>
|
694 |
</tr>
|
695 |
<tr>
|
696 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
697 |
<td>1.33</td>
|
698 |
<td>1.4</td>
|
699 |
+
<td>393</td>
|
700 |
<td>2.2</td>
|
701 |
+
<td>726</td>
|
702 |
<td>2.7</td>
|
703 |
+
<td>764</td>
|
704 |
</tr>
|
705 |
</tbody>
|
706 |
</table>
|