Old-Fisherman
/

SDXL_Finetune_GGUF_Files

GGUF

Model card Files Files and versions Community

Old-Fisherman commited on Feb 5

Commit

e1ebdf9

verified ·

1 Parent(s): adfffe7

Rename convert_mod.py to convert_g.py

Browse files

Files changed (2) hide show

convert_g.py +141 -0
convert_mod.py +0 -261

convert_g.py ADDED Viewed

	@@ -0,0 +1,141 @@

+#!/usr/bin/env python3
+import os
+import torch
+import gguf  # llama.cpp's specific gguf python module
+import argparse
+from tqdm import tqdm
+from safetensors.torch import load_file
+# Configuration constants
+QUANTIZATION_THRESHOLD = 1024
+MAX_TENSOR_NAME_LENGTH = 127
+# Base model template class
+class ModelTemplate:
+    arch = "invalid"
+    shape_fix = False
+    keys_detect = []
+    keys_banned = []
+# Specific template for clip_g using ComfyUI standard keys
+class ModelClipG(ModelTemplate):
+    arch = "clip_g"
+    shape_fix = False  # No rearrangement for text encoder models
+    keys_detect = [
+        ("logit_scale",),
+        ("text_model.embeddings.position_embedding.weight",),
+        ("text_model.encoder.layers.0.self_attn.in_proj_weight",),
+    ]
+    keys_banned = []
+# Only clip_g in this conversion script
+arch_list = [ModelClipG]
+def is_model_arch(model, state_dict):
+    for key_tuple in model.keys_detect:
+        if all(key in state_dict for key in key_tuple):
+            # Optionally check for banned keys
+            if any(key in state_dict for key in model.keys_banned):
+                raise ValueError("Model architecture not allowed for conversion!")
+            return True
+    return False
+def detect_arch(state_dict):
+    for model in arch_list:
+        if is_model_arch(model, state_dict):
+            return model
+    raise ValueError("Unknown model architecture!")
+def parse_args():
+    parser = argparse.ArgumentParser(description="Convert clip_g model (ComfyUI standard) to GGUF")
+    parser.add_argument("--src", required=True, help="Source model file (.safetensors, .pt, etc)")
+    parser.add_argument("--dst", help="Output GGUF file")
+    return parser.parse_args()
+def load_state_dict(path):
+    if any(path.endswith(ext) for ext in [".ckpt", ".pt", ".bin", ".pth"]):
+        state_dict = torch.load(path, map_location="cpu", weights_only=True)
+        state_dict = state_dict.get("model", state_dict)
+    else:
+        state_dict = load_file(path)
+    # Remove unwanted prefixes if they exist.
+    prefix = None
+    for pfx in ["model.diffusion_model.", "model."]:
+        if any(k.startswith(pfx) for k in state_dict.keys()):
+            prefix = pfx
+            break
+    new_state = {}
+    for k, v in state_dict.items():
+        if prefix:
+            if not k.startswith(prefix):
+                continue
+            k = k.replace(prefix, "")
+        new_state[k] = v
+    return new_state
+def load_model(path):
+    state_dict = load_state_dict(path)
+    model_arch = detect_arch(state_dict)
+    print(f"Detected architecture: {model_arch.arch}")
+    writer = gguf.GGUFWriter(path=None, arch=model_arch.arch)
+    return writer, state_dict, model_arch
+def handle_tensors(writer, state_dict, model_arch):
+    # Check that all tensor names are within allowed length.
+    for key in state_dict.keys():
+        if len(key) > MAX_TENSOR_NAME_LENGTH:
+            raise ValueError(f"Tensor name {key} exceeds maximum length {MAX_TENSOR_NAME_LENGTH}")
+    for key, tensor in tqdm(state_dict.items(), desc="Processing tensors"):
+        if isinstance(tensor, torch.Tensor):
+            data = tensor.detach().cpu().numpy()
+        else:
+            data = tensor
+        # Determine quantization based on key name
+        key_lower = key.lower()
+        if data.ndim == 1 or "bias" in key_lower or "layer_norm" in key_lower or "ln_" in key_lower:
+            data_qtype = gguf.GGMLQuantizationType.F32
+        elif "embeddings" in key_lower:
+            data_qtype = gguf.GGMLQuantizationType.F32
+        else:
+            data_qtype = gguf.GGMLQuantizationType.F16
+        if data.size <= QUANTIZATION_THRESHOLD:
+            data_qtype = gguf.GGMLQuantizationType.F32
+        try:
+            quantized = gguf.quants.quantize(data, data_qtype)
+        except Exception as e:
+            tqdm.write(f"Quantization failed for {key} with error {e}; falling back to F16")
+            data_qtype = gguf.GGMLQuantizationType.F16
+            quantized = gguf.quants.quantize(data, data_qtype)
+        writer.add_tensor(key, quantized, raw_dtype=data_qtype)
+        tqdm.write(f"Processed {key}: {data.dtype} -> {data_qtype.name}, shape = {data.shape}")
+def main():
+    args = parse_args()
+    writer, state_dict, model_arch = load_model(args.src)
+    # Determine file type based on first tensor's dtype.
+    first_tensor = next(iter(state_dict.values()))
+    if first_tensor.dtype == torch.bfloat16:
+        out_path = args.dst or os.path.splitext(args.src)[0] + "-BF16.gguf"
+        writer.add_file_type(gguf.LlamaFileType.MOSTLY_BF16)
+    else:
+        out_path = args.dst or os.path.splitext(args.src)[0] + "-F16.gguf"
+        writer.add_file_type(gguf.LlamaFileType.MOSTLY_F16)
+    if os.path.isfile(out_path):
+        input("Output exists. Press enter to continue or Ctrl+C to abort")
+    handle_tensors(writer, state_dict, model_arch)
+    writer.write_header_to_file(path=out_path)
+    writer.write_kv_data_to_file()
+    writer.write_tensors_to_file(progress=True)
+    writer.close()
+if __name__ == "__main__":
+    main()

convert_mod.py DELETED Viewed

@@ -1,261 +0,0 @@
-# (c) City96 || Apache-2.0 (apache.org/licenses/LICENSE-2.0)
-import os
-import torch
-import gguf # This needs to be the llama.cpp one specifically!
-import argparse
-from tqdm import tqdm
-from safetensors.torch import load_file
-QUANTIZATION_THRESHOLD = 1024
-REARRANGE_THRESHOLD = 512
-MAX_TENSOR_NAME_LENGTH = 127
-class ModelTemplate:
-    arch = "invalid"  # string describing architecture
-    shape_fix = False # whether to reshape tensors
-    keys_detect = []  # list of lists to match in state dict
-    keys_banned = []  # list of keys that should mark model as invalid for conversion
-class ModelFlux(ModelTemplate):
-    arch = "flux"
-    keys_detect = [
-        ("transformer_blocks.0.attn.norm_added_k.weight",),
-        ("double_blocks.0.img_attn.proj.weight",),
-    ]
-    keys_banned = ["transformer_blocks.0.attn.norm_added_k.weight",]
-class ModelSD3(ModelTemplate):
-    arch = "sd3"
-    keys_detect = [
-        ("transformer_blocks.0.attn.add_q_proj.weight",),
-        ("joint_blocks.0.x_block.attn.qkv.weight",),
-    ]
-    keys_banned = ["transformer_blocks.0.attn.add_q_proj.weight",]
-class ModelAura(ModelTemplate):
-    arch = "aura"
-    keys_detect = [
-        ("double_layers.3.modX.1.weight",),
-        ("joint_transformer_blocks.3.ff_context.out_projection.weight",),
-    ]
-    keys_banned = ["joint_transformer_blocks.3.ff_context.out_projection.weight",]
-class ModelLTXV(ModelTemplate):
-    arch = "ltxv"
-    keys_detect = [
-        (
-            "adaln_single.emb.timestep_embedder.linear_2.weight",
-            "transformer_blocks.27.scale_shift_table",
-            "caption_projection.linear_2.weight",
-        )
-    ]
-class ModelSDXL(ModelTemplate):
-    arch = "sdxl"
-    shape_fix = True
-    keys_detect = [
-        ("down_blocks.0.downsamplers.0.conv.weight", "add_embedding.linear_1.weight",),
-        (
-            "input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight",
-            "output_blocks.2.2.conv.weight", "output_blocks.5.2.conv.weight",
-        ), # Non-diffusers
-        ("label_emb.0.0.weight",),
-    ]
-class ModelSD1(ModelTemplate):
-    arch = "sd1"
-    shape_fix = True
-    keys_detect = [
-        ("down_blocks.0.downsamplers.0.conv.weight",),
-        (
-            "input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight", "input_blocks.9.0.op.weight",
-            "output_blocks.2.1.conv.weight", "output_blocks.5.2.conv.weight", "output_blocks.8.2.conv.weight"
-        ), # Non-diffusers
-    ]
-class ModelClipG(ModelTemplate):
-    arch = "clip_g"
-    keys_detect = [
-        ("text_model.final_layer_norm.bias",),  # Example key, adjust as needed
-        (
-            "text_model.encoder.layers.0.self_attn.k_proj.weight",  # Example key, adjust as needed
-            "text_model.encoder.layers.1.self_attn.q_proj.bias",  # Example key, adjust as needed
-        ),
-    ]
-    keys_banned = []  # Add any banned keys if necessary
-# The architectures are checked in order and the first successful match terminates the search.
-arch_list = [ModelFlux, ModelSD3, ModelAura, ModelLTXV, ModelSDXL, ModelSD1, ModelClipG]
-def is_model_arch(model, state_dict):
-    # check if model is correct
-    matched = False
-    invalid = False
-    for match_list in model.keys_detect:
-        print(f"Checking match list: {match_list}")
-        if all(key in state_dict for key in match_list):
-            print(f"Match found for {match_list}")
-            matched = True
-            invalid = any(key in state_dict for key in model.keys_banned)
-            break
-    assert not invalid, "Model architecture not allowed for conversion! (i.e. reference VS diffusers format)"
-    return matched
-def detect_arch(state_dict):
-    model_arch = None
-    for arch in arch_list:
-        if is_model_arch(arch, state_dict):
-            model_arch = arch
-            break
-    assert model_arch is not None, "Unknown model architecture!"
-    return model_arch
-def parse_args():
-    parser = argparse.ArgumentParser(description="Generate F16 GGUF files from single UNET")
-    parser.add_argument("--src", required=True, help="Source model ckpt file.")
-    parser.add_argument("--dst", help="Output unet gguf file.")
-    args = parser.parse_args()
-    if not os.path.isfile(args.src):
-        parser.error("No input provided!")
-    return args
-def load_state_dict(path):
-    if any(path.endswith(x) for x in [".ckpt", ".pt", ".bin", ".pth"]):
-        state_dict = torch.load(path, map_location="cpu", weights_only=True)
-        state_dict = state_dict.get("model", state_dict)
-    else:
-        state_dict = load_file(path)
-    # only keep unet with no prefix!
-    prefix = None
-    for pfx in ["model.diffusion_model.", "model."]:
-        if any([x.startswith(pfx) for x in state_dict.keys()]):
-            prefix = pfx
-            break
-    sd = {}
-    for k, v in state_dict.items():
-        if prefix and prefix not in k:
-            continue
-        if prefix:
-            k = k.replace(prefix, "")
-        sd[k] = v
-    return sd
-def load_model(path):
-    state_dict = load_state_dict(path)
-    model_arch = detect_arch(state_dict)
-    print(f"* Architecture detected from input: {model_arch.arch}")
-    writer = gguf.GGUFWriter(path=None, arch=model_arch.arch)
-    return (writer, state_dict, model_arch)
-def handle_tensors(args, writer, state_dict, model_arch):
-    name_lengths = tuple(sorted(
-        ((key, len(key)) for key in state_dict.keys()),
-        key=lambda item: item[1],
-        reverse=True,
-    ))
-    if not name_lengths:
-        return
-    max_name_len = name_lengths[0][1]
-    if max_name_len > MAX_TENSOR_NAME_LENGTH:
-        bad_list = ", ".join(f"{key!r} ({namelen})" for key, namelen in name_lengths if namelen > MAX_TENSOR_NAME_LENGTH)
-        raise ValueError(f"Can only handle tensor names up to {MAX_TENSOR_NAME_LENGTH} characters. Tensors exceeding the limit: {bad_list}")
-    for key, data in tqdm(state_dict.items()):
-        old_dtype = data.dtype
-        if data.dtype == torch.bfloat16:
-            data = data.to(torch.float32).numpy()
-        # this is so we don't break torch 2.0.X
-        elif data.dtype in [getattr(torch, "float8_e4m3fn", "_invalid"), getattr(torch, "float8_e5m2", "_invalid")]:
-            data = data.to(torch.float16).numpy()
-        else:
-            data = data.numpy()
-        n_dims = len(data.shape)
-        data_shape = data.shape
-        data_qtype = getattr(
-            gguf.GGMLQuantizationType,
-            "BF16" if old_dtype == torch.bfloat16 else "F16"
-        )
-        # get number of parameters (AKA elements) in this tensor
-        n_params = 1
-        for dim_size in data_shape:
-            n_params *= dim_size
-        # keys to keep as max precision
-        blacklist = {
-            "time_embedding.",
-            "add_embedding.",
-            "time_in.",
-            "txt_in.",
-            "vector_in.",
-            "img_in.",
-            "guidance_in.",
-            "final_layer.",
-        }
-        if old_dtype in (torch.float32, torch.bfloat16):
-            if n_dims == 1:
-                # one-dimensional tensors should be kept in F32
-                # also speeds up inference due to not dequantizing
-                data_qtype = gguf.GGMLQuantizationType.F32
-            elif n_params <= QUANTIZATION_THRESHOLD:
-                # very small tensors
-                data_qtype = gguf.GGMLQuantizationType.F32
-            elif ".weight" in key and any(x in key for x in blacklist):
-                data_qtype = gguf.GGMLQuantizationType.F32
-        if (model_arch.shape_fix                        # NEVER reshape for models such as flux
-            and n_dims > 1                              # Skip one-dimensional tensors
-            and n_params >= REARRANGE_THRESHOLD         # Only rearrange tensors meeting the size requirement
-            and (n_params / 256).is_integer()           # Rearranging only makes sense if total elements is divisible by 256
-            and not (data.shape[-1] / 256).is_integer() # Only need to rearrange if the last dimension is not divisible by 256
-        ):
-            orig_shape = data.shape
-            data = data.reshape(n_params // 256, 256)
-            writer.add_array(f"comfy.gguf.orig_shape.{key}", tuple(int(dim) for dim in orig_shape))
-        try:
-            data = gguf.quants.quantize(data, data_qtype)
-        except (AttributeError, gguf.QuantError) as e:
-            tqdm.write(f"falling back to F16: {e}")
-            data_qtype = gguf.GGMLQuantizationType.F16
-            data = gguf.quants.quantize(data, data_qtype)
-        new_name = key # do we need to rename?
-        shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
-        tqdm.write(f"{f'%-{max_name_len + 4}s' % f'{new_name}'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
-        writer.add_tensor(new_name, data, raw_dtype=data_qtype)
-if __name__ == "__main__":
-    args = parse_args()
-    path = args.src
-    writer, state_dict, model_arch = load_model(path)
-    writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
-    if next(iter(state_dict.values())).dtype == torch.bfloat16:
-        out_path = f"{os.path.splitext(path)[0]}-BF16.gguf"
-        writer.add_file_type(gguf.LlamaFileType.MOSTLY_BF16)
-    else:
-        out_path = f"{os.path.splitext(path)[0]}-F16.gguf"
-        writer.add_file_type(gguf.LlamaFileType.MOSTLY_F16)
-    out_path = args.dst or out_path
-    if os.path.isfile(out_path):
-        input("Output exists enter to continue or ctrl+c to abort!")
-    handle_tensors(path, writer, state_dict, model_arch)
-    writer.write_header_to_file(path=out_path)
-    writer.write_kv_data_to_file()
-    writer.write_tensors_to_file(progress=True)
-    writer.close()