{ "dfloat11_config": { "bytes_per_thread": 8, "pattern_dict": { "blocks\\.\\d+": [ "attn1.to_q", "attn1.to_k", "attn1.to_v", "attn1.to_out.0", "attn2.to_q", "attn2.to_k", "attn2.to_v", "attn2.to_out.0", "ffn.net.0.proj", "ffn.net.2" ] }, "threads_per_block": [ 512 ], "version": "0.2.0" }, "model_type": "llama" }