diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..250d7d4bf92f110c955501c05a259c2bdb782071 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +## DFloat11 Compressed Model: `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B` + +This is a **losslessly compressed** version of [`deepseek-ai/DeepSeek-R1-Distill-Qwen-32B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) using our custom **DFloat11** format. The outputs of this compressed model are **bit-for-bit identical** to the original BFloat16 model, while reducing GPU memory consumption by approximately **30%**. + +### 🔍 How It Works + +DFloat11 compresses model weights using **Huffman coding** of BFloat16 exponent bits, combined with **hardware-aware algorithmic designs** that enable efficient on-the-fly decompression directly on the GPU. During inference, the weights remain compressed in GPU memory and are **decompressed just before matrix multiplications**, then **immediately discarded after use** to minimize memory footprint. + +Key benefits: + +* **No CPU decompression or host-device data transfer** -- all operations are handled entirely on the GPU. +* **Decompression overhead is constant** per forward pass and **independent of batch size**, making DFloat11 increasingly efficient at larger batch sizes. +* DFloat11 is **much faster than CPU-offloading approaches**, enabling practical deployment in memory-constrained environments. +* At **batch size = 1**, inference is approximately **2× slower** than the original BF16 model, but the performance gap **narrows significantly** with larger batches. +* The compression is **fully lossless**, guaranteeing that the model’s outputs are **bit-for-bit identical** to those of the original model. + +### 🔧 How to Use + +1. Install the DFloat11 pip package *(installs the CUDA kernel automatically; requires a CUDA-compatible GPU and PyTorch installed)*: + + ```bash + pip install dfloat11[cuda12] + # or if you have CUDA version 11: + # pip install dfloat11[cuda11] + ``` + +2. To use the DFloat11 model, run the following example code in Python: + + ```python + import torch + from dfloat11 import DFloat11Model + from transformers import AutoTokenizer + + model_id = "DFloat11/DeepSeek-R1-Distill-Qwen-32B-DF11" + + model = DFloat11Model.from_pretrained(model_id, device_map="auto") + + tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer.pad_token = tokenizer.eos_token + + prompt = "Question: What is a binary tree and its applications? Answer:" + inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(model.device) + + with torch.no_grad(): + output = model.generate( + **inputs, + max_new_tokens=256, + do_sample=True, + ) + + print(tokenizer.batch_decode(output, skip_special_tokens=True)) + ``` + +### 📄 Learn More + +* **Paper**: [70% Size, 100% Accuracy: Lossless LLM Compression for Efficient GPU Inference via Dynamic-Length Float](https://arxiv.org/abs/2504.11651) +* **GitHub**: [https://github.com/LeanModels/DFloat11](https://github.com/LeanModels/DFloat11) +* **HuggingFace**: [https://huggingface.co/DFloat11](https://huggingface.co/DFloat11) \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dada2b6880ca0f3199a6b4bc8abbcb4fb9302560 --- /dev/null +++ b/config.json @@ -0,0 +1,48 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dfloat11_config": { + "bytes_per_thread": 8, + "pattern_dict": { + "lm_head": [], + "model.embed_tokens": [], + "model.layers.\\d+": [ + "self_attn.q_proj", + "self_attn.k_proj", + "self_attn.v_proj", + "self_attn.o_proj", + "mlp.gate_proj", + "mlp.up_proj", + "mlp.down_proj" + ] + }, + "threads_per_block": [ + 512 + ], + "version": "0.2.0" + }, + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 27648, + "max_position_embeddings": 131072, + "max_window_layers": 64, + "model_type": "qwen2", + "num_attention_heads": 40, + "num_hidden_layers": 64, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.3", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..92878bd36a6f22c0ad39d3eecd6839be7eeab4ab --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 151646, + "do_sample": true, + "eos_token_id": 151643, + "temperature": 0.6, + "top_p": 0.95, + "transformers_version": "4.51.3" +} diff --git a/lm_head.safetensors b/lm_head.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..487421fe90ce27a74e2f5ac2d2dfece9eeddcd0b --- /dev/null +++ b/lm_head.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262f0cdad054005a32309fe1b157f7b426a07ae903ebf7b736813de8d3ee2003 +size 1056885536 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8145cca7375342f0d36125b7ba109fad643efe67 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7372e0345e6413aeb3caed3266d35640bbb13159a20f9349bafd50c3c2ee1cb1 +size 10360 diff --git a/model_embed_tokens.safetensors b/model_embed_tokens.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86d2f738eb24e35ccbcccf50e8b83d91d947b2a7 --- /dev/null +++ b/model_embed_tokens.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d754feea1cfb7eb41cbd1bcd18055f8313dc53c7fad0d36bea999143711ac1 +size 1073106128 diff --git a/model_layers_0.safetensors b/model_layers_0.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5ccf1b93b3bd1f2b13ffd4edff5a646c1efcf34 --- /dev/null +++ b/model_layers_0.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e556d9a487202fc83e1eaff3a5f88abbb36aebcade45c708c9d9f737ffdd5f +size 662441978 diff --git a/model_layers_1.safetensors b/model_layers_1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9755b645af350504dfab56174e2086c9fa349d4 --- /dev/null +++ b/model_layers_1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3a2af66877831e05bf8ee4c0f58d11c795051c34482e79704fff5fe964d132c +size 725166597 diff --git a/model_layers_10.safetensors b/model_layers_10.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd6c72b62f936e7a5d13cbc8bd0cb7ea1bda0f58 --- /dev/null +++ b/model_layers_10.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d4bed358e0658a03c157ba5ced51ecb132c101106a09f942177035c425c3c5e +size 659979946 diff --git a/model_layers_11.safetensors b/model_layers_11.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d90a1dacb2c9a26b4c923d1107fa99ead42628f5 --- /dev/null +++ b/model_layers_11.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228cd95e46bd52cfbfae70e04933ed9d7dc41a82888dc0f47199429121b386ba +size 659918803 diff --git a/model_layers_12.safetensors b/model_layers_12.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..005547bbae084809d336e463935cb5f2ddd215e1 --- /dev/null +++ b/model_layers_12.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ced64d84d3d6918ec19e6152e5e02e7658d913e6af1ec6907cf2524f0ee0bc +size 659389766 diff --git a/model_layers_13.safetensors b/model_layers_13.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..688e0fdf2efb869795b8acec58071627298117af --- /dev/null +++ b/model_layers_13.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f24235a9337b972a840f21defe923b903c5d4c2975c8085ef3cb9c36034e8ec7 +size 659693888 diff --git a/model_layers_14.safetensors b/model_layers_14.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d218b7dd63fc16b3e9a1392d00cd7101dad8ecba --- /dev/null +++ b/model_layers_14.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d8de556cbea7c1dbaa89ffde95581537182afba37d3038cdb7d2421f18f647 +size 659693904 diff --git a/model_layers_15.safetensors b/model_layers_15.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a00a98b5600dcb2f415354722536e413765c95b --- /dev/null +++ b/model_layers_15.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b46adbc65842166dcdbd3ab5438a329640ff71ca7be7a8a492961e10113c50b6 +size 660158903 diff --git a/model_layers_16.safetensors b/model_layers_16.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ca2d973600b3e9c474c7b476c38a872e45c8390 --- /dev/null +++ b/model_layers_16.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ea785666a9f7e9ff8e91dfe6ffdba58fea79f5cb7593905c908c171ecaf9d3 +size 659882224 diff --git a/model_layers_17.safetensors b/model_layers_17.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2db4227bc6114857f0cf4edb00e6e78ec6af9ef2 --- /dev/null +++ b/model_layers_17.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc546e926d8063087dbaa6da4dba3745b0aa30d441050cde8059b7427596a76 +size 659742874 diff --git a/model_layers_18.safetensors b/model_layers_18.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b6ec25213dc30bd680c1a4bdcf53bd6a3084d49 --- /dev/null +++ b/model_layers_18.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a78c4bce9f8293d5882b3761ac222cdd7ffb370ca66fbdf66988c80a0bab6d24 +size 659930747 diff --git a/model_layers_19.safetensors b/model_layers_19.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfb8978f9dca45f0e44d166c15e855bc4a49520b --- /dev/null +++ b/model_layers_19.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d914f4114c2c54493cf69414996bcb071f6427f4937d80109b2ca810e79906 +size 660045727 diff --git a/model_layers_2.safetensors b/model_layers_2.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36e07cf52762119ae046b012851a72e296331017 --- /dev/null +++ b/model_layers_2.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f00ad02e65119dead518589a1e2ac382470d906980dd58305ff3f5be3b3d001e +size 720484054 diff --git a/model_layers_20.safetensors b/model_layers_20.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75d97c10ab48eb321f45363948c584e46d651242 --- /dev/null +++ b/model_layers_20.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2f59693b89676df3d8e07556651cc93004c0ac81bd6233c94ba8fc1765e492 +size 660008541 diff --git a/model_layers_21.safetensors b/model_layers_21.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de74661881c93240b125a13893a4cac61143d6e4 --- /dev/null +++ b/model_layers_21.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9a00c2a3a819c691b61a3b6344b075ad2625ec0d8e932560a5e33f91bb21fc +size 660025414 diff --git a/model_layers_22.safetensors b/model_layers_22.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddd19961340eb05325275615cb8cbdaffc6a665c --- /dev/null +++ b/model_layers_22.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27431a21b432735837d72d8ea0748b2e923d92e1ef2cfb343574a34230d92011 +size 660041687 diff --git a/model_layers_23.safetensors b/model_layers_23.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c60d09676455fbc4b4d70ecb13f53b7739af30c --- /dev/null +++ b/model_layers_23.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0a23c6d94ba63a1f33f6f5d461c50720c46bc5c4f1213736db9af57386b3a57 +size 660102695 diff --git a/model_layers_24.safetensors b/model_layers_24.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e1429f3b5fed160179057febb0b2e6bc60117d9 --- /dev/null +++ b/model_layers_24.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b52a4c49ae539bfa7309a56904b088b99a65717bed4662da1b21e5d71509f49 +size 659879208 diff --git a/model_layers_25.safetensors b/model_layers_25.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35444557efd8c23bb433314abb232c1d0e44f28f --- /dev/null +++ b/model_layers_25.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4631ddf9a3a228ffde06d53798672e25694e19b68b64848cfba5795dbbe92e72 +size 659860281 diff --git a/model_layers_26.safetensors b/model_layers_26.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29f8176c4d0d2727a0d93ccc8928b93424d61d02 --- /dev/null +++ b/model_layers_26.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd35a238487ac3a1c05f3afb84ef9abd5f4785103a259fbd8af28aad4420c679 +size 659937778 diff --git a/model_layers_27.safetensors b/model_layers_27.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e673691aeb2c316cb7e2ad825f743cecb3cacfa3 --- /dev/null +++ b/model_layers_27.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01bf4b3141223368471cea3580de1fced077ac5331ba91b18328b8841e71ff37 +size 660095500 diff --git a/model_layers_28.safetensors b/model_layers_28.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23e1ebd6da649e1ebc42e70045507870033ea1b6 --- /dev/null +++ b/model_layers_28.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb52dc9d522c9833b67c0874e9dbf6dc272a3bd808a05ec3307a29a34144152a +size 660349657 diff --git a/model_layers_29.safetensors b/model_layers_29.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef9c8193db1b01c059bd181f4ba04021e5c2b231 --- /dev/null +++ b/model_layers_29.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506f46a23fe2f36ffe27850a3bb927403b5c5c5d4a6d2f8978d5ed65cc66be20 +size 660238782 diff --git a/model_layers_3.safetensors b/model_layers_3.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e889a1b7cdfd479529b11c80b6afd885c4c2187 --- /dev/null +++ b/model_layers_3.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2eedd33b98f5da3123d346e92dfc9e25ea72b4709419a6f4459b4643a416b32 +size 716178698 diff --git a/model_layers_30.safetensors b/model_layers_30.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f095e27a2d4db298f31e85f103047429cf28040 --- /dev/null +++ b/model_layers_30.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba1b2e12aea9e57c8fa96260082383573acbcd283a7d00b73bdc1f2786f89990 +size 660154409 diff --git a/model_layers_31.safetensors b/model_layers_31.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51a511a0b80ec8829f5d5f5670ab9e95f68971ae --- /dev/null +++ b/model_layers_31.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05718dfa286672dc4b2b71f7b040c26e5d33326ad4350683f2253d03fcf9ed2b +size 660085711 diff --git a/model_layers_32.safetensors b/model_layers_32.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0638e76d06e93b3490786c5aa75bd321c627d4a --- /dev/null +++ b/model_layers_32.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ecb318a215c56dade952ebf4c513bf17d640508e8a9bf4bd6161041a895591b +size 660131842 diff --git a/model_layers_33.safetensors b/model_layers_33.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..782599b11ce6e77470de14ac1703f41186b5c192 --- /dev/null +++ b/model_layers_33.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f5d38b55219e05bdd43a13eaf0c07321c4541554a7d0008b908773dadd02690 +size 659999114 diff --git a/model_layers_34.safetensors b/model_layers_34.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7aba7197a429c256db8c6818b83312ce7494ee5 --- /dev/null +++ b/model_layers_34.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a661ad479392473b13e4733fbe0f5389646ad7a41814da558f97e3e5747e69ff +size 660178412 diff --git a/model_layers_35.safetensors b/model_layers_35.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20f8700732c596e7813552b8fddb1dd8c4372304 --- /dev/null +++ b/model_layers_35.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0d3a8d010aaaea9fc638af2939e807adc7a7b72be8fe7f70f51fe7ade0d0ec +size 660284891 diff --git a/model_layers_36.safetensors b/model_layers_36.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0fe303283838aae12531013381568f6df1cd7066 --- /dev/null +++ b/model_layers_36.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33a0057ad0e1d13732a8758f50c30610c45673e0af91a505a9a4d616b98d35d1 +size 660306945 diff --git a/model_layers_37.safetensors b/model_layers_37.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10a34927874a3d643ae960effcda38444bb28936 --- /dev/null +++ b/model_layers_37.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5326682e243820a870ac9e28303b01c3fdfc5363c49d5bf334b50c5fd115967 +size 660254993 diff --git a/model_layers_38.safetensors b/model_layers_38.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b87bad72e29fbd7d8db00d7ceed4ac89aa169a5 --- /dev/null +++ b/model_layers_38.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876242355e5f0ce8cdedd1256f581b18811924c3f96fa776529755b05cc2ca3d +size 660325297 diff --git a/model_layers_39.safetensors b/model_layers_39.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca6823d53502bab48192f74038a1412527cf10d3 --- /dev/null +++ b/model_layers_39.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36de3203a71c654b83266fff9e24d6a455ad22c772d7bbce21821f7108d80251 +size 660467243 diff --git a/model_layers_4.safetensors b/model_layers_4.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85a59647b994920f59c676f9a0400bb8212c6c37 --- /dev/null +++ b/model_layers_4.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bad41f176b4e3b871369a0a03bd58bc89d0bcb6bccb4c4fa2a06011df0709cf +size 714368386 diff --git a/model_layers_40.safetensors b/model_layers_40.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd332af8658397ab79462ee73111129f7f12fda4 --- /dev/null +++ b/model_layers_40.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60bb8269125282554b424d5a8061254d423d032ebe64e4b32c3d75e389bfebff +size 660206507 diff --git a/model_layers_41.safetensors b/model_layers_41.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f4e608ffa79e2b6199f9225568194f8b8b4c3a3 --- /dev/null +++ b/model_layers_41.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64704696897292329cf325060769a67c4d159cac3c98b8c186f513f5fb7ed8ab +size 660317617 diff --git a/model_layers_42.safetensors b/model_layers_42.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b00c4dc35919afdccc3a48ead8e7f7cfccf63eea --- /dev/null +++ b/model_layers_42.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa780b39fd43aac49e6e728c64a183394bb171a76e5466e658b551c9f267b92b +size 660258726 diff --git a/model_layers_43.safetensors b/model_layers_43.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24d5b9ac8725271e9793e916f23f7d730886f0a2 --- /dev/null +++ b/model_layers_43.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1075e1d05911b07102d9b931046b21b67234560302a378cbe78310b2d4a7ade0 +size 660384825 diff --git a/model_layers_44.safetensors b/model_layers_44.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f096705454eef187ee393651e142c6ba081b920 --- /dev/null +++ b/model_layers_44.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc1ff11c68214120aa1b62b261af4befa3a82aa86b2613f1b68be59afdf1500c +size 660669818 diff --git a/model_layers_45.safetensors b/model_layers_45.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..437374bd0f90279e2153aeab67f07d92ca436454 --- /dev/null +++ b/model_layers_45.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d442edc5540a500225a803d9c0d72ec2c88ef2c6dbb82153ac70cdd74e82b5b +size 660444923 diff --git a/model_layers_46.safetensors b/model_layers_46.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab070903496616ce109e679f6b33afd09a91394e --- /dev/null +++ b/model_layers_46.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ef6022314f23eb41b92cf1c1904ff9f89af535e82b8b573e0fa2dae81186ea +size 660008546 diff --git a/model_layers_47.safetensors b/model_layers_47.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b8252cbfa26f5564caa001b19bc3fe2dd9b2171 --- /dev/null +++ b/model_layers_47.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:640485599adf9fcda532463cc0beb916fcacd7e38b3e65bd5e365fd1fda76f61 +size 659742991 diff --git a/model_layers_48.safetensors b/model_layers_48.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d724c8ef231480b82250cc9ac0e950994235628 --- /dev/null +++ b/model_layers_48.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e780fca32868f7e40b41e6bf0f78936cbfd4164270ce87baa4da7118d5b65698 +size 659682009 diff --git a/model_layers_49.safetensors b/model_layers_49.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ecf94f6aae09813b216a068d40d8f8eb46cdde5 --- /dev/null +++ b/model_layers_49.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d60ecf04d669105ab758905b543ffd416002dbecbe26d7958c9cece30558572 +size 659723254 diff --git a/model_layers_5.safetensors b/model_layers_5.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb8c13bc62f37edfed5516f82fa6a13b3fd06464 --- /dev/null +++ b/model_layers_5.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14cbfe033bc5c241e6f74f0eee685c34ef9fc87d49db28e6d1f287b4948e9c77 +size 700106707 diff --git a/model_layers_50.safetensors b/model_layers_50.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e04f55cc99b237f2da68dfc7ff0f42ca772465c3 --- /dev/null +++ b/model_layers_50.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0b61ff3f93251d8e99b2748ac593a79a6ed40a84579af29c4eda9ac874aa71d +size 659639358 diff --git a/model_layers_51.safetensors b/model_layers_51.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60616b3c84ebbf0b9ed578c4417b2c80a6ae0624 --- /dev/null +++ b/model_layers_51.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228bdf4bda8ded1ef0846a78c4028f77ce8b9122d60b6c4a054f783b3e9584e7 +size 659397344 diff --git a/model_layers_52.safetensors b/model_layers_52.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f3a76107dcc6b86dde166b34b0008697064895a --- /dev/null +++ b/model_layers_52.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e401373ba7dc57a628b5f5f200806d8ee504c08cf3fc2862c93b67311ca316 +size 659644440 diff --git a/model_layers_53.safetensors b/model_layers_53.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a10bfd88de685ee32fcfd284a5a7a1449583d89 --- /dev/null +++ b/model_layers_53.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9a1992b791cf11b58acfb96553a2e5653d67c736a26c69120580bcce092f27e +size 659118512 diff --git a/model_layers_54.safetensors b/model_layers_54.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97c7712dcc258931366cfbe59becf93dca00fcda --- /dev/null +++ b/model_layers_54.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaba1b42d1cf868308f0d61828353164d7e468e25bd39db0b87865e8bf86337e +size 658936286 diff --git a/model_layers_55.safetensors b/model_layers_55.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b78248d92a1b98ee8002027c287cd8a61462bc2 --- /dev/null +++ b/model_layers_55.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9adc2177c97fca7bac255d49cf6925357c89eb5c37ad07783f07a74b58578c72 +size 658960476 diff --git a/model_layers_56.safetensors b/model_layers_56.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae4338e596d87ba644fdc5e6bfdfd2fdf5e6b29b --- /dev/null +++ b/model_layers_56.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b38f9c48299ebed1d6ab85cafab3fc0d052ce943584128ff499b2ef50fc33404 +size 659093717 diff --git a/model_layers_57.safetensors b/model_layers_57.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..947fb726bd20b82de0a3691ede8c1237e98ecb0f --- /dev/null +++ b/model_layers_57.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81319250d720ef1d22de5274b241682dceea615269816e6a735b1519a7099392 +size 659018974 diff --git a/model_layers_58.safetensors b/model_layers_58.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdead87960570a4c856c7f63bb908de5d529da53 --- /dev/null +++ b/model_layers_58.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67db22c8ae03e1b084c3c077b7654bb2dbeb3529594ad89b9c89916188d5902 +size 659020735 diff --git a/model_layers_59.safetensors b/model_layers_59.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed757e60c8e585ac8ffdc7bd92af9aa79457311c --- /dev/null +++ b/model_layers_59.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b1177eb4c8ab2ce12b9fc487ee109761fb68e7c8a5df44c1862dbe1e11f034 +size 659710456 diff --git a/model_layers_6.safetensors b/model_layers_6.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee63d6e5269e2a2e3e6e057036f11a54968bdc94 --- /dev/null +++ b/model_layers_6.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6890e2bac5f1372006d446e453eaba2d42b2d063b824d12bf5f8a77c88b23050 +size 690707179 diff --git a/model_layers_60.safetensors b/model_layers_60.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20d8e9b593bf0a98f71b3b9bb0c77e04464e3a47 --- /dev/null +++ b/model_layers_60.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b28b402b1f61e72bd069bff4a5755c64d62a159e61830d8f7fce8fc29438a2b2 +size 660075138 diff --git a/model_layers_61.safetensors b/model_layers_61.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..298f9afbed80a2cfa28e2b229dffe7055c79061d --- /dev/null +++ b/model_layers_61.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25fb057fd4604481754660acbf52f98beea18c242b14e4262d12590f26b64e99 +size 660235999 diff --git a/model_layers_62.safetensors b/model_layers_62.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5558899dcc07a7573718f7431fbcb0fc91ef235 --- /dev/null +++ b/model_layers_62.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092b11a0de1c0302328327fc2e7379ad24ecc4a94b875cbc24ee9521f90f7711 +size 660708014 diff --git a/model_layers_63.safetensors b/model_layers_63.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f58db27078fbf61552e51985c54a38c0fd10f5bf --- /dev/null +++ b/model_layers_63.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08ec374290b32924c159cee77c888848fbc65f158d3aa7e2e8f5a3d80bd53e7 +size 660542388 diff --git a/model_layers_7.safetensors b/model_layers_7.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf9fa8ffb4448f07eaddd7adaae8b65bfd6f31c7 --- /dev/null +++ b/model_layers_7.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fc87986be0a56c637ccb68eddce8d767e35a948c6359650a7656e53759e6cc +size 664073959 diff --git a/model_layers_8.safetensors b/model_layers_8.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa2d0e15f61e5fb5b8a66d0512671bc5d3001d81 --- /dev/null +++ b/model_layers_8.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93ce5b595714c7208d1bed449eec7819d8f5c093b9d482fc128206cb8b1c1cab +size 660830363 diff --git a/model_layers_9.safetensors b/model_layers_9.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6441b6c9bd2d12bc1d7a8e8fb7526a1b23c4ac01 --- /dev/null +++ b/model_layers_9.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac8b8f69289819266b94d1236aecedfff0fe082fab395ce9fb92be0f7d804f54 +size 660202273 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef6e98c3e0446cad00c5e6fb6bf2f5bbaf2eb0bd --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,195 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +}