Old-Fisherman
/

SDXL_Finetune_GGUF_Files

GGUF

Model card Files Files and versions Community

Old-Fisherman commited on Feb 3

Commit

b922c99

verified ·

1 Parent(s): c9472c6

Update convert_mod.py

Browse files

Files changed (1) hide show

convert_mod.py +261 -261

convert_mod.py CHANGED Viewed

@@ -1,261 +1,261 @@
-# (c) City96 || Apache-2.0 (apache.org/licenses/LICENSE-2.0)
-import os
-import torch
-import gguf # This needs to be the llama.cpp one specifically!
-import argparse
-from tqdm import tqdm
-from safetensors.torch import load_file
-QUANTIZATION_THRESHOLD = 1024
-REARRANGE_THRESHOLD = 512
-MAX_TENSOR_NAME_LENGTH = 127
-class ModelTemplate:
-    arch = "invalid"  # string describing architecture
-    shape_fix = False # whether to reshape tensors
-    keys_detect = []  # list of lists to match in state dict
-    keys_banned = []  # list of keys that should mark model as invalid for conversion
-class ModelFlux(ModelTemplate):
-    arch = "flux"
-    keys_detect = [
-        ("transformer_blocks.0.attn.norm_added_k.weight",),
-        ("double_blocks.0.img_attn.proj.weight",),
-    ]
-    keys_banned = ["transformer_blocks.0.attn.norm_added_k.weight",]
-class ModelSD3(ModelTemplate):
-    arch = "sd3"
-    keys_detect = [
-        ("transformer_blocks.0.attn.add_q_proj.weight",),
-        ("joint_blocks.0.x_block.attn.qkv.weight",),
-    ]
-    keys_banned = ["transformer_blocks.0.attn.add_q_proj.weight",]
-class ModelAura(ModelTemplate):
-    arch = "aura"
-    keys_detect = [
-        ("double_layers.3.modX.1.weight",),
-        ("joint_transformer_blocks.3.ff_context.out_projection.weight",),
-    ]
-    keys_banned = ["joint_transformer_blocks.3.ff_context.out_projection.weight",]
-class ModelLTXV(ModelTemplate):
-    arch = "ltxv"
-    keys_detect = [
-        (
-            "adaln_single.emb.timestep_embedder.linear_2.weight",
-            "transformer_blocks.27.scale_shift_table",
-            "caption_projection.linear_2.weight",
-        )
-    ]
-class ModelSDXL(ModelTemplate):
-    arch = "sdxl"
-    shape_fix = True
-    keys_detect = [
-        ("down_blocks.0.downsamplers.0.conv.weight", "add_embedding.linear_1.weight",),
-        (
-            "input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight",
-            "output_blocks.2.2.conv.weight", "output_blocks.5.2.conv.weight",
-        ), # Non-diffusers
-        ("label_emb.0.0.weight",),
-    ]
-class ModelSD1(ModelTemplate):
-    arch = "sd1"
-    shape_fix = True
-    keys_detect = [
-        ("down_blocks.0.downsamplers.0.conv.weight",),
-        (
-            "input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight", "input_blocks.9.0.op.weight",
-            "output_blocks.2.1.conv.weight", "output_blocks.5.2.conv.weight", "output_blocks.8.2.conv.weight"
-        ), # Non-diffusers
-    ]
-class ModelClipG(ModelTemplate):
-    arch = "clip_g"
-    keys_detect = [
-        ("conditioner.embedders.1.model.ln_final.bias",),  # Final layer normalization bias
-        (
-            "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight",  # Attention input projection weight
-            "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight",  # Attention input projection weight for another block
-        ),
-    ]
-    keys_banned = []  # Add any banned keys if necessary
-# The architectures are checked in order and the first successful match terminates the search.
-arch_list = [ModelFlux, ModelSD3, ModelAura, ModelLTXV, ModelSDXL, ModelSD1, ModelClipG]
-def is_model_arch(model, state_dict):
-    # check if model is correct
-    matched = False
-    invalid = False
-    for match_list in model.keys_detect:
-        print(f"Checking match list: {match_list}")
-        if all(key in state_dict for key in match_list):
-            print(f"Match found for {match_list}")
-            matched = True
-            invalid = any(key in state_dict for key in model.keys_banned)
-            break
-    assert not invalid, "Model architecture not allowed for conversion! (i.e. reference VS diffusers format)"
-    return matched
-def detect_arch(state_dict):
-    model_arch = None
-    for arch in arch_list:
-        if is_model_arch(arch, state_dict):
-            model_arch = arch
-            break
-    assert model_arch is not None, "Unknown model architecture!"
-    return model_arch
-def parse_args():
-    parser = argparse.ArgumentParser(description="Generate F16 GGUF files from single UNET")
-    parser.add_argument("--src", required=True, help="Source model ckpt file.")
-    parser.add_argument("--dst", help="Output unet gguf file.")
-    args = parser.parse_args()
-    if not os.path.isfile(args.src):
-        parser.error("No input provided!")
-    return args
-def load_state_dict(path):
-    if any(path.endswith(x) for x in [".ckpt", ".pt", ".bin", ".pth"]):
-        state_dict = torch.load(path, map_location="cpu", weights_only=True)
-        state_dict = state_dict.get("model", state_dict)
-    else:
-        state_dict = load_file(path)
-    # only keep unet with no prefix!
-    prefix = None
-    for pfx in ["model.diffusion_model.", "model."]:
-        if any([x.startswith(pfx) for x in state_dict.keys()]):
-            prefix = pfx
-            break
-    sd = {}
-    for k, v in state_dict.items():
-        if prefix and prefix not in k:
-            continue
-        if prefix:
-            k = k.replace(prefix, "")
-        sd[k] = v
-    return sd
-def load_model(path):
-    state_dict = load_state_dict(path)
-    model_arch = detect_arch(state_dict)
-    print(f"* Architecture detected from input: {model_arch.arch}")
-    writer = gguf.GGUFWriter(path=None, arch=model_arch.arch)
-    return (writer, state_dict, model_arch)
-def handle_tensors(args, writer, state_dict, model_arch):
-    name_lengths = tuple(sorted(
-        ((key, len(key)) for key in state_dict.keys()),
-        key=lambda item: item[1],
-        reverse=True,
-    ))
-    if not name_lengths:
-        return
-    max_name_len = name_lengths[0][1]
-    if max_name_len > MAX_TENSOR_NAME_LENGTH:
-        bad_list = ", ".join(f"{key!r} ({namelen})" for key, namelen in name_lengths if namelen > MAX_TENSOR_NAME_LENGTH)
-        raise ValueError(f"Can only handle tensor names up to {MAX_TENSOR_NAME_LENGTH} characters. Tensors exceeding the limit: {bad_list}")
-    for key, data in tqdm(state_dict.items()):
-        old_dtype = data.dtype
-        if data.dtype == torch.bfloat16:
-            data = data.to(torch.float32).numpy()
-        # this is so we don't break torch 2.0.X
-        elif data.dtype in [getattr(torch, "float8_e4m3fn", "_invalid"), getattr(torch, "float8_e5m2", "_invalid")]:
-            data = data.to(torch.float16).numpy()
-        else:
-            data = data.numpy()
-        n_dims = len(data.shape)
-        data_shape = data.shape
-        data_qtype = getattr(
-            gguf.GGMLQuantizationType,
-            "BF16" if old_dtype == torch.bfloat16 else "F16"
-        )
-        # get number of parameters (AKA elements) in this tensor
-        n_params = 1
-        for dim_size in data_shape:
-            n_params *= dim_size
-        # keys to keep as max precision
-        blacklist = {
-            "time_embedding.",
-            "add_embedding.",
-            "time_in.",
-            "txt_in.",
-            "vector_in.",
-            "img_in.",
-            "guidance_in.",
-            "final_layer.",
-        }
-        if old_dtype in (torch.float32, torch.bfloat16):
-            if n_dims == 1:
-                # one-dimensional tensors should be kept in F32
-                # also speeds up inference due to not dequantizing
-                data_qtype = gguf.GGMLQuantizationType.F32
-            elif n_params <= QUANTIZATION_THRESHOLD:
-                # very small tensors
-                data_qtype = gguf.GGMLQuantizationType.F32
-            elif ".weight" in key and any(x in key for x in blacklist):
-                data_qtype = gguf.GGMLQuantizationType.F32
-        if (model_arch.shape_fix                        # NEVER reshape for models such as flux
-            and n_dims > 1                              # Skip one-dimensional tensors
-            and n_params >= REARRANGE_THRESHOLD         # Only rearrange tensors meeting the size requirement
-            and (n_params / 256).is_integer()           # Rearranging only makes sense if total elements is divisible by 256
-            and not (data.shape[-1] / 256).is_integer() # Only need to rearrange if the last dimension is not divisible by 256
-        ):
-            orig_shape = data.shape
-            data = data.reshape(n_params // 256, 256)
-            writer.add_array(f"comfy.gguf.orig_shape.{key}", tuple(int(dim) for dim in orig_shape))
-        try:
-            data = gguf.quants.quantize(data, data_qtype)
-        except (AttributeError, gguf.QuantError) as e:
-            tqdm.write(f"falling back to F16: {e}")
-            data_qtype = gguf.GGMLQuantizationType.F16
-            data = gguf.quants.quantize(data, data_qtype)
-        new_name = key # do we need to rename?
-        shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
-        tqdm.write(f"{f'%-{max_name_len + 4}s' % f'{new_name}'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
-        writer.add_tensor(new_name, data, raw_dtype=data_qtype)
-if __name__ == "__main__":
-    args = parse_args()
-    path = args.src
-    writer, state_dict, model_arch = load_model(path)
-    writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
-    if next(iter(state_dict.values())).dtype == torch.bfloat16:
-        out_path = f"{os.path.splitext(path)[0]}-BF16.gguf"
-        writer.add_file_type(gguf.LlamaFileType.MOSTLY_BF16)
-    else:
-        out_path = f"{os.path.splitext(path)[0]}-F16.gguf"
-        writer.add_file_type(gguf.LlamaFileType.MOSTLY_F16)
-    out_path = args.dst or out_path
-    if os.path.isfile(out_path):
-        input("Output exists enter to continue or ctrl+c to abort!")
-    handle_tensors(path, writer, state_dict, model_arch)
-    writer.write_header_to_file(path=out_path)
-    writer.write_kv_data_to_file()
-    writer.write_tensors_to_file(progress=True)
-    writer.close()

+# (c) City96 || Apache-2.0 (apache.org/licenses/LICENSE-2.0)
+import os
+import torch
+import gguf # This needs to be the llama.cpp one specifically!
+import argparse
+from tqdm import tqdm
+from safetensors.torch import load_file
+QUANTIZATION_THRESHOLD = 1024
+REARRANGE_THRESHOLD = 512
+MAX_TENSOR_NAME_LENGTH = 127
+class ModelTemplate:
+    arch = "invalid"  # string describing architecture
+    shape_fix = False # whether to reshape tensors
+    keys_detect = []  # list of lists to match in state dict
+    keys_banned = []  # list of keys that should mark model as invalid for conversion
+class ModelFlux(ModelTemplate):
+    arch = "flux"
+    keys_detect = [
+        ("transformer_blocks.0.attn.norm_added_k.weight",),
+        ("double_blocks.0.img_attn.proj.weight",),
+    ]
+    keys_banned = ["transformer_blocks.0.attn.norm_added_k.weight",]
+class ModelSD3(ModelTemplate):
+    arch = "sd3"
+    keys_detect = [
+        ("transformer_blocks.0.attn.add_q_proj.weight",),
+        ("joint_blocks.0.x_block.attn.qkv.weight",),
+    ]
+    keys_banned = ["transformer_blocks.0.attn.add_q_proj.weight",]
+class ModelAura(ModelTemplate):
+    arch = "aura"
+    keys_detect = [
+        ("double_layers.3.modX.1.weight",),
+        ("joint_transformer_blocks.3.ff_context.out_projection.weight",),
+    ]
+    keys_banned = ["joint_transformer_blocks.3.ff_context.out_projection.weight",]
+class ModelLTXV(ModelTemplate):
+    arch = "ltxv"
+    keys_detect = [
+        (
+            "adaln_single.emb.timestep_embedder.linear_2.weight",
+            "transformer_blocks.27.scale_shift_table",
+            "caption_projection.linear_2.weight",
+        )
+    ]
+class ModelSDXL(ModelTemplate):
+    arch = "sdxl"
+    shape_fix = True
+    keys_detect = [
+        ("down_blocks.0.downsamplers.0.conv.weight", "add_embedding.linear_1.weight",),
+        (
+            "input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight",
+            "output_blocks.2.2.conv.weight", "output_blocks.5.2.conv.weight",
+        ), # Non-diffusers
+        ("label_emb.0.0.weight",),
+    ]
+class ModelSD1(ModelTemplate):
+    arch = "sd1"
+    shape_fix = True
+    keys_detect = [
+        ("down_blocks.0.downsamplers.0.conv.weight",),
+        (
+            "input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight", "input_blocks.9.0.op.weight",
+            "output_blocks.2.1.conv.weight", "output_blocks.5.2.conv.weight", "output_blocks.8.2.conv.weight"
+        ), # Non-diffusers
+    ]
+class ModelClipG(ModelTemplate):
+    arch = "clip_g"
+    keys_detect = [
+        ("text_model.encoder.layers.22.ln_final.bias",),  # Example key, adjust as needed
+        (
+            "text_model.encoder.layers.0.attn.in_proj_weight",  # Example key, adjust as needed
+            "text_model.encoder.layers.1.attn.in_proj_weight",  # Example key, adjust as needed
+        ),
+    ]
+    keys_banned = []  # Add any banned keys if necessary
+# The architectures are checked in order and the first successful match terminates the search.
+arch_list = [ModelFlux, ModelSD3, ModelAura, ModelLTXV, ModelSDXL, ModelSD1, ModelClipG]
+def is_model_arch(model, state_dict):
+    # check if model is correct
+    matched = False
+    invalid = False
+    for match_list in model.keys_detect:
+        print(f"Checking match list: {match_list}")
+        if all(key in state_dict for key in match_list):
+            print(f"Match found for {match_list}")
+            matched = True
+            invalid = any(key in state_dict for key in model.keys_banned)
+            break
+    assert not invalid, "Model architecture not allowed for conversion! (i.e. reference VS diffusers format)"
+    return matched
+def detect_arch(state_dict):
+    model_arch = None
+    for arch in arch_list:
+        if is_model_arch(arch, state_dict):
+            model_arch = arch
+            break
+    assert model_arch is not None, "Unknown model architecture!"
+    return model_arch
+def parse_args():
+    parser = argparse.ArgumentParser(description="Generate F16 GGUF files from single UNET")
+    parser.add_argument("--src", required=True, help="Source model ckpt file.")
+    parser.add_argument("--dst", help="Output unet gguf file.")
+    args = parser.parse_args()
+    if not os.path.isfile(args.src):
+        parser.error("No input provided!")
+    return args
+def load_state_dict(path):
+    if any(path.endswith(x) for x in [".ckpt", ".pt", ".bin", ".pth"]):
+        state_dict = torch.load(path, map_location="cpu", weights_only=True)
+        state_dict = state_dict.get("model", state_dict)
+    else:
+        state_dict = load_file(path)
+    # only keep unet with no prefix!
+    prefix = None
+    for pfx in ["model.diffusion_model.", "model."]:
+        if any([x.startswith(pfx) for x in state_dict.keys()]):
+            prefix = pfx
+            break
+    sd = {}
+    for k, v in state_dict.items():
+        if prefix and prefix not in k:
+            continue
+        if prefix:
+            k = k.replace(prefix, "")
+        sd[k] = v
+    return sd
+def load_model(path):
+    state_dict = load_state_dict(path)
+    model_arch = detect_arch(state_dict)
+    print(f"* Architecture detected from input: {model_arch.arch}")
+    writer = gguf.GGUFWriter(path=None, arch=model_arch.arch)
+    return (writer, state_dict, model_arch)
+def handle_tensors(args, writer, state_dict, model_arch):
+    name_lengths = tuple(sorted(
+        ((key, len(key)) for key in state_dict.keys()),
+        key=lambda item: item[1],
+        reverse=True,
+    ))
+    if not name_lengths:
+        return
+    max_name_len = name_lengths[0][1]
+    if max_name_len > MAX_TENSOR_NAME_LENGTH:
+        bad_list = ", ".join(f"{key!r} ({namelen})" for key, namelen in name_lengths if namelen > MAX_TENSOR_NAME_LENGTH)
+        raise ValueError(f"Can only handle tensor names up to {MAX_TENSOR_NAME_LENGTH} characters. Tensors exceeding the limit: {bad_list}")
+    for key, data in tqdm(state_dict.items()):
+        old_dtype = data.dtype
+        if data.dtype == torch.bfloat16:
+            data = data.to(torch.float32).numpy()
+        # this is so we don't break torch 2.0.X
+        elif data.dtype in [getattr(torch, "float8_e4m3fn", "_invalid"), getattr(torch, "float8_e5m2", "_invalid")]:
+            data = data.to(torch.float16).numpy()
+        else:
+            data = data.numpy()
+        n_dims = len(data.shape)
+        data_shape = data.shape
+        data_qtype = getattr(
+            gguf.GGMLQuantizationType,
+            "BF16" if old_dtype == torch.bfloat16 else "F16"
+        )
+        # get number of parameters (AKA elements) in this tensor
+        n_params = 1
+        for dim_size in data_shape:
+            n_params *= dim_size
+        # keys to keep as max precision
+        blacklist = {
+            "time_embedding.",
+            "add_embedding.",
+            "time_in.",
+            "txt_in.",
+            "vector_in.",
+            "img_in.",
+            "guidance_in.",
+            "final_layer.",
+        }
+        if old_dtype in (torch.float32, torch.bfloat16):
+            if n_dims == 1:
+                # one-dimensional tensors should be kept in F32
+                # also speeds up inference due to not dequantizing
+                data_qtype = gguf.GGMLQuantizationType.F32
+            elif n_params <= QUANTIZATION_THRESHOLD:
+                # very small tensors
+                data_qtype = gguf.GGMLQuantizationType.F32
+            elif ".weight" in key and any(x in key for x in blacklist):
+                data_qtype = gguf.GGMLQuantizationType.F32
+        if (model_arch.shape_fix                        # NEVER reshape for models such as flux
+            and n_dims > 1                              # Skip one-dimensional tensors
+            and n_params >= REARRANGE_THRESHOLD         # Only rearrange tensors meeting the size requirement
+            and (n_params / 256).is_integer()           # Rearranging only makes sense if total elements is divisible by 256
+            and not (data.shape[-1] / 256).is_integer() # Only need to rearrange if the last dimension is not divisible by 256
+        ):
+            orig_shape = data.shape
+            data = data.reshape(n_params // 256, 256)
+            writer.add_array(f"comfy.gguf.orig_shape.{key}", tuple(int(dim) for dim in orig_shape))
+        try:
+            data = gguf.quants.quantize(data, data_qtype)
+        except (AttributeError, gguf.QuantError) as e:
+            tqdm.write(f"falling back to F16: {e}")
+            data_qtype = gguf.GGMLQuantizationType.F16
+            data = gguf.quants.quantize(data, data_qtype)
+        new_name = key # do we need to rename?
+        shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
+        tqdm.write(f"{f'%-{max_name_len + 4}s' % f'{new_name}'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
+        writer.add_tensor(new_name, data, raw_dtype=data_qtype)
+if __name__ == "__main__":
+    args = parse_args()
+    path = args.src
+    writer, state_dict, model_arch = load_model(path)
+    writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
+    if next(iter(state_dict.values())).dtype == torch.bfloat16:
+        out_path = f"{os.path.splitext(path)[0]}-BF16.gguf"
+        writer.add_file_type(gguf.LlamaFileType.MOSTLY_BF16)
+    else:
+        out_path = f"{os.path.splitext(path)[0]}-F16.gguf"
+        writer.add_file_type(gguf.LlamaFileType.MOSTLY_F16)
+    out_path = args.dst or out_path
+    if os.path.isfile(out_path):
+        input("Output exists enter to continue or ctrl+c to abort!")
+    handle_tensors(path, writer, state_dict, model_arch)
+    writer.write_header_to_file(path=out_path)
+    writer.write_kv_data_to_file()
+    writer.write_tensors_to_file(progress=True)
+    writer.close()