File size: 1,565 Bytes
29e93ec 29fbb6a 1a6ab32 29e93ec 1a6ab32 29e93ec 1a6ab32 29e93ec 6eaa88c 1a6ab32 6eaa88c 1a6ab32 29e93ec 1a6ab32 29e93ec 1a6ab32 29e93ec 1a6ab32 29e93ec 1a6ab32 29e93ec 1a6ab32 29e93ec 1a6ab32 6eaa88c 1a6ab32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
[general]
name = "moe"
universal = false
[torch]
include = ["."]
pyext = [
"py",
"json",
]
src = [
"core/scalar_type.hpp",
"torch-ext/torch_binding.cpp",
"torch-ext/torch_binding.h",
]
[kernel.moe-marlin]
backend = "cuda"
cuda-capabilities = [
"8.0",
"8.6",
"8.7",
"8.9",
"9.0",
"10.0",
"10.1",
"12.0",
]
depends = ["torch"]
include = ["."]
src = [
"core/exception.hpp",
"core/scalar_type.hpp",
"marlin-moe/marlin_moe_ops.cu",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.cu",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.cu",
"marlin-moe/marlin_kernels/marlin_moe_kernel.h",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.h",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.h",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.cu",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.h",
]
[kernel.activation]
backend = "cuda"
depends = ["torch"]
src = [
"activation/activation_kernels.cu",
"activation/cuda_compat.h",
"activation/dispatch_utils.h",
]
[kernel.fp8]
backend = "cuda"
depends = ["torch"]
include = ["."]
src = [
"cuda_compat.h",
"dispatch_utils.h",
"fp8/amd/hip_float8.h",
"fp8/amd/hip_float8_impl.h",
"fp8/common.cu",
"fp8/common.cuh",
"fp8/vectorization.cuh",
]
[kernel.moe]
backend = "cuda"
depends = ["torch"]
src = [
"cuda_compat.h",
"dispatch_utils.h",
"moe/moe_align_sum_kernels.cu",
"moe/moe_wna16.cu",
"moe/moe_wna16_utils.h",
"moe/topk_softmax_kernels.cu",
]
|