LeanQuant commited on 13 days ago

Commit

444ac37

verified ·

1 Parent(s): 0b0d89c

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

.gitattributes +1 -0
README.md +68 -0
config.json +46 -0
generation_config.json +6 -0
lm_head.safetensors +3 -0
model.safetensors +3 -0
model_embed_tokens.safetensors +3 -0
model_layers_0.safetensors +3 -0
model_layers_1.safetensors +3 -0
model_layers_10.safetensors +3 -0
model_layers_11.safetensors +3 -0
model_layers_12.safetensors +3 -0
model_layers_13.safetensors +3 -0
model_layers_14.safetensors +3 -0
model_layers_15.safetensors +3 -0
model_layers_16.safetensors +3 -0
model_layers_17.safetensors +3 -0
model_layers_18.safetensors +3 -0
model_layers_19.safetensors +3 -0
model_layers_2.safetensors +3 -0
model_layers_20.safetensors +3 -0
model_layers_21.safetensors +3 -0
model_layers_22.safetensors +3 -0
model_layers_23.safetensors +3 -0
model_layers_24.safetensors +3 -0
model_layers_25.safetensors +3 -0
model_layers_26.safetensors +3 -0
model_layers_27.safetensors +3 -0
model_layers_28.safetensors +3 -0
model_layers_29.safetensors +3 -0
model_layers_3.safetensors +3 -0
model_layers_30.safetensors +3 -0
model_layers_31.safetensors +3 -0
model_layers_32.safetensors +3 -0
model_layers_33.safetensors +3 -0
model_layers_34.safetensors +3 -0
model_layers_35.safetensors +3 -0
model_layers_36.safetensors +3 -0
model_layers_37.safetensors +3 -0
model_layers_38.safetensors +3 -0
model_layers_39.safetensors +3 -0
model_layers_4.safetensors +3 -0
model_layers_5.safetensors +3 -0
model_layers_6.safetensors +3 -0
model_layers_7.safetensors +3 -0
model_layers_8.safetensors +3 -0
model_layers_9.safetensors +3 -0
special_tokens_map.json +23 -0
tokenizer.json +3 -0
tokenizer_config.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,68 @@

+---
+base_model: mistralai/Mistral-Nemo-Instruct-2407
+base_model_relation: quantized
+tags:
+- dfloat11
+- df11
+- lossless compression
+- 70% size, 100% accuracy
+---
+## DFloat11 Compressed Model: `mistralai/Mistral-Nemo-Instruct-2407`
+This is a **losslessly compressed** version of [`mistralai/Mistral-Nemo-Instruct-2407`](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407) using our custom **DFloat11** format. The model size is reduced from **24.50GB to 16.14GB**. The outputs of this compressed model are **bit-for-bit identical** to the original BFloat16 model, while reducing GPU memory consumption by approximately **30%**.
+### 🔍 How It Works
+DFloat11 compresses model weights using **Huffman coding** of BFloat16 exponent bits, combined with **hardware-aware algorithmic designs** that enable efficient on-the-fly decompression directly on the GPU. During inference, the weights remain compressed in GPU memory and are **decompressed just before matrix multiplications**, then **immediately discarded after use** to minimize memory footprint.
+Key benefits:
+* **No CPU decompression or host-device data transfer** -- all operations are handled entirely on the GPU.
+* **Decompression overhead is constant** per forward pass and **independent of batch size**, making DFloat11 increasingly efficient at larger batch sizes.
+* DFloat11 is **much faster than CPU-offloading approaches**, enabling practical deployment in memory-constrained environments.
+* At **batch size = 1**, inference is approximately **2× slower** than the original BF16 model, but the performance gap **narrows significantly** with larger batches.
+* The compression is **fully lossless**, guaranteeing that the model’s outputs are **bit-for-bit identical** to those of the original model.
+### 🔧 How to Use
+1. Install the DFloat11 pip package *(installs the CUDA kernel automatically; requires a CUDA-compatible GPU and PyTorch installed)*:
+    ```bash
+    pip install -U dfloat11[cuda12]
+    # or if you have CUDA version 11:
+    # pip install -U dfloat11[cuda11]
+    ```
+2. To use the DFloat11 model, run the following example code in Python:
+    ```python
+    import torch
+    from dfloat11 import DFloat11Model
+    from transformers import AutoTokenizer
+    model_id = "DFloat11/Mistral-Nemo-Instruct-2407-DF11"
+    model = DFloat11Model.from_pretrained(model_id, device_map="auto")
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    tokenizer.pad_token = tokenizer.eos_token
+    prompt = "Question: What is a binary tree and its applications? Answer:"
+    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(model.device)
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=256,
+            do_sample=True,
+        )
+    print(tokenizer.batch_decode(output, skip_special_tokens=True))
+    ```
+### 📄 Learn More
+* **Paper**: [70% Size, 100% Accuracy: Lossless LLM Compression for Efficient GPU Inference via Dynamic-Length Float](https://arxiv.org/abs/2504.11651)
+* **GitHub**: [https://github.com/LeanModels/DFloat11](https://github.com/LeanModels/DFloat11)
+* **HuggingFace**: [https://huggingface.co/DFloat11](https://huggingface.co/DFloat11)

config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "dfloat11_config": {
+    "bytes_per_thread": 8,
+    "pattern_dict": {
+      "lm_head": [],
+      "model\\.embed_tokens": [],
+      "model\\.layers\\.\\d+": [
+        "self_attn.q_proj",
+        "self_attn.k_proj",
+        "self_attn.v_proj",
+        "self_attn.o_proj",
+        "mlp.gate_proj",
+        "mlp.up_proj",
+        "mlp.down_proj"
+      ]
+    },
+    "threads_per_block": [
+      512
+    ],
+    "version": "0.2.0"
+  },
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 131072,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3",
+  "use_cache": true,
+  "vocab_size": 131072
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.51.3"
+}

lm_head.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aebf578c11bf369deca610b915b0645374465e8652a1b19026a5e7880f5c2377
+size 454258018

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d12551f41e3c361d53da174e979e2fc48467da7864c847acc598900aa18ccc5d
+size 10360

model_embed_tokens.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13cd26b6f8b2874959a5bf3377f110172ec245ec1edd7435e8111156219ada00
+size 912813605

model_layers_0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79148707f46595235a8af251aca5e4e3f746504ec27253789a99e609b4a2082f
+size 375895723

model_layers_1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19daad5bd0d4b44afd3a3825ecff2a88c35b885796426800b0b40d034b2b9c18
+size 369567249

model_layers_10.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9d55df79a5b40177b56ffd883f1ce2e22b26571263d807c61da072f8f2bd273
+size 369418903

model_layers_11.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0120324c9e6a76e27f6a4c3e6ffdcd0d6cd249a32df521ba5a785d718c37f89f
+size 369497871

model_layers_12.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad7619905ea57d20293130b139098fd51b875bc9c4604cbaee4affc4125de79b
+size 369454169

model_layers_13.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:436f13b83bc67003d051f68456d348e99d536a095f39f045a6e7a9f9a21f8f4a
+size 369523000

model_layers_14.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2f6c740f5279b9b11e33ca402b71a3f08270897ff81103748a6fa89f8cb4f59
+size 369561528

model_layers_15.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54298c9cf0fa4a547bfdc2d3f60bdbd061acaf5026c991feb7e8f1496909263b
+size 369486887

model_layers_16.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:237e9cf14c052c01ed69577e6076f77136f46672a215afd14a45215994298f9a
+size 369414889

model_layers_17.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:342445da081aa5a62b7ed25063229a1a1c1c3a4a69bb0be4b139104c2ab16ca2
+size 369394336

model_layers_18.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24e5cd2444552108da5d408d30d5a7dbc74e5001f83dab9bf5daa35a858e72ab
+size 369393931

model_layers_19.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:826020881651f4218832d03088aeee505b227365e164808b8a4bb595c8a913a3
+size 369199210

model_layers_2.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae90caae077f7fa665a9f455b4709afcd803c053db885f0173ee0619d3e54d86
+size 369447885

model_layers_20.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09de0b3bc71923957c003dede37f37c0bbdc589d3e27edf6c5fc9a5f8cf5bdc5
+size 368946463

model_layers_21.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d04ac9b831416d9ef12f65db84ed165fb15247c120bbd0af8b87b79290aff451
+size 368830351

model_layers_22.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8f2e61c24e13cf4278cd2e786f4e7a680089c5de1ebf078927bac24872f6d28
+size 368823443

model_layers_23.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88680f9917b46c85b944299634dc0fadc7197457fc8933b6e3e729c0c1cddeb5
+size 368816969

model_layers_24.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:114ae48a4d3e771d29b65e78c5638c0d84c6cc13e85a1f1b52ad6d80c4cd1eb9
+size 368676505

model_layers_25.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:162ac810e8115b13489cdbe40c8325929c4310ae7fc213a26bf58924abf6d314
+size 368679584

model_layers_26.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4762e15ace41cbe85e097284f094c8451908c683eb5d18e6f1dd0f5ba5d104c
+size 368564871

model_layers_27.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e1b9806994d647de25ae18b6634cb1a992d971e48d249c1227f04af582407a9
+size 368511335

model_layers_28.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c729b62b3f53c774daa5f08765af2a28d0104123398e15caf5c2f027dfe23cc
+size 368679193

model_layers_29.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60d86a6972d139202d1de94ed3b26239e094242ba9dcba8a1b04a45cc2077440
+size 368640523

model_layers_3.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:141fb5c03742b072ad81a6fbdf3c58be3e3d23d93f3a17b21f9e1934260bcbc1
+size 369404341

model_layers_30.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3bafcef49ff293f57c6c6abb69ec7477badd36a18a97f7ee2dcf943bbaa06a9
+size 368579303

model_layers_31.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:730696658e019a370d54c3dbeb3d2a1aca8410fc2aac2304bc7049130a583b37
+size 368647561

model_layers_32.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da83b1507cc04b2efabe52362c7152b1c3319478a0d2d9310a44ace46c3936db
+size 368698869

model_layers_33.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:602fe99f097512b85ada2a288f837709704d208e5ca4d668d48d633214e28cf6
+size 368655370

model_layers_34.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:722a75b39810dfa193852cb6af8dccdf30e3e62af381c6cfeae76bb56c80ba9a
+size 368989156

model_layers_35.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:739aaa7a617199f52cade7952834ec97bf20e138d96fc5e54d0dd0f3555db9b5
+size 368813358

model_layers_36.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cb9091742cfd3906f3633ddcdfbe24b07f3bf8680d1cf154222503961dbb4ac
+size 368915080

model_layers_37.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d0ca216a20596297db9fd59d5c9a659aa0654e2ab311ca2f98b92f818dd53b2
+size 369163802

model_layers_38.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa434eded7c3e7639b2b86040fd244ab76b82c68f241af30d12ecc88aebe749e
+size 369476720

model_layers_39.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60b0c40b6bd11fea89ba1e4566deebe1b6ff63e16b69b583cd5040c8f72727d7
+size 369284242

model_layers_4.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34267eb5750a8478a8968279e2200f62828d37ddfb427be6d72d832693b686aa
+size 369373924

model_layers_5.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20c27cefc07f7b9b3cb5a6ce6631b8b2725d1f05a2f7bf0de300cb269cbdf1f7
+size 369420459

model_layers_6.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b59760962b9340b5858a1bee94cb855f37b572aeb9583f8858a2fecf7dc33d62
+size 369491914

model_layers_7.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:108ea5dd36f7bc00b03589a7073adb59dc6686b77c3c112658868d9303987352
+size 369131759

model_layers_8.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f716e86a73a27914166053890e2771350a9ef3c73b9f3c4b68ad8da5165914fa
+size 369285907

model_layers_9.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc2a50530c557e4cff00690f240fccdb84e1c153f10e6a8ee2e9453432b11cca
+size 369342282

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
+size 17078292

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff