{ | |
"dfloat11_config": { | |
"bytes_per_thread": 8, | |
"pattern_dict": { | |
"transformer_blocks.\\d+": [ | |
"norm1.linear", | |
"norm1_context.linear", | |
"attn.to_q", | |
"attn.to_k", | |
"attn.to_v", | |
"attn.add_k_proj", | |
"attn.add_v_proj", | |
"attn.add_q_proj", | |
"attn.to_out.0", | |
"attn.to_add_out", | |
"ff.net.0.proj", | |
"ff.net.2", | |
"ff_context.net.0.proj", | |
"ff_context.net.2" | |
], | |
"single_transformer_blocks.\\d+": [ | |
"norm.linear", | |
"proj_mlp", | |
"proj_out", | |
"attn.to_q", | |
"attn.to_k", | |
"attn.to_v" | |
] | |
}, | |
"threads_per_block": [ | |
512 | |
], | |
"version": "0.2.0" | |
}, | |
"model_type": "llama" | |
} | |