{ "dfloat11_config": { "bytes_per_thread": 8, "pattern_dict": { "transformer_blocks.\\d+": [ "norm1.linear", "norm1_context.linear", "attn.to_q", "attn.to_k", "attn.to_v", "attn.add_k_proj", "attn.add_v_proj", "attn.add_q_proj", "attn.to_out.0", "attn.to_add_out", "ff.net.0.proj", "ff.net.2", "ff_context.net.0.proj", "ff_context.net.2" ], "single_transformer_blocks.\\d+": [ "norm.linear", "proj_mlp", "proj_out", "attn.to_q", "attn.to_k", "attn.to_v" ] }, "threads_per_block": [ 512 ], "version": "0.2.0" }, "model_type": "llama" }