|
[general] |
|
version = "0.0.1" |
|
|
|
[torch] |
|
name = "paged_attention" |
|
src = [ |
|
"torch-ext/registration.h", |
|
"torch-ext/torch_binding.cpp", |
|
"torch-ext/torch_binding.h" |
|
] |
|
pyroot = "torch-ext" |
|
|
|
[kernel.cuda_utils] |
|
capabilities = [ "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0" ] |
|
src = [ |
|
"cuda-utils/cuda_utils_kernels.cu", |
|
] |
|
depends = [] |
|
|
|
|
|
[kernel.paged_attention] |
|
capabilities = [ "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0" ] |
|
src = [ |
|
"paged-attention/attention/attention_dtypes.h", |
|
"paged-attention/attention/attention_generic.cuh", |
|
"paged-attention/attention/attention_kernels.cuh", |
|
"paged-attention/attention/attention_utils.cuh", |
|
"paged-attention/attention/dtype_bfloat16.cuh", |
|
"paged-attention/attention/dtype_float16.cuh", |
|
"paged-attention/attention/dtype_float32.cuh", |
|
"paged-attention/attention/dtype_fp8.cuh", |
|
"paged-attention/attention/paged_attention_v1.cu", |
|
"paged-attention/attention/paged_attention_v2.cu", |
|
"paged-attention/cache_kernels.cu", |
|
"paged-attention/cuda_compat.h", |
|
"paged-attention/dispatch_utils.h", |
|
"paged-attention/quantization/fp8/amd/hip_float8.h", |
|
"paged-attention/quantization/fp8/amd/hip_float8_impl.h", |
|
"paged-attention/quantization/fp8/amd/quant_utils.cuh", |
|
"paged-attention/quantization/fp8/nvidia/quant_utils.cuh", |
|
] |
|
include = [ "." ] |
|
depends = [ "torch" ] |
|
|
|
|