yamoe / build.toml
drbh
fix: improve layer for transformer integration
bd058af
[general]
name = "yamoe"
universal = false
[torch]
src = [
"torch-ext/torch_binding.cpp",
"torch-ext/torch_binding.h"
]
[kernel.yamoe]
backend = "cuda"
cuda-capabilities = [
# "7.0",
# "7.2",
# "7.5",
# "8.0",
"8.6",
"8.7",
"8.9",
"9.0",
# "10.0",
# "10.1",
# "11.8",
# "12.0"
]
depends = ["torch", "cutlass_3_8"]
src = [
"csrc/index_select.cu",
"csrc/gather.cu",
"csrc/scatter.cu",
"csrc/sort.cu",
"csrc/bincount_cumsum.cu",
"csrc/batch_mm.cu",
"csrc/moe.cpp",
"csrc/experts_backward.cu"
]