Spaces:
Running
Running
Commit
·
3175a17
1
Parent(s):
5096c91
Add Ascend NPU backend (llama/6035)
Browse files* [CANN] Add Ascend NPU backend
Ascend is a full-stack AI computing infrastructure for industry
applications and services based on Huawei Ascend processors and
software.
CANN (Compute Architecture of Neural Networks), developped by
Huawei, is a heterogeneous computing architecture for AI.
Co-authored-by: wangshuai09 <[email protected]>
* delete trailing whitespaces
* Modify the code based on review comment
* Rename LLAMA_CANN to GGML_CANN
* Make ggml-common.h private
* add ggml_cann prefix for acl funcs
* Add logging for CANN backend
* Delete Trailing whitespace
---------
Co-authored-by: wangshuai09 <[email protected]>
- ggml/include/ggml.h +3 -0
- ggml/src/CMakeLists.txt +69 -0
- ggml/src/ggml-backend.c +5 -0
- ggml/src/ggml.c +10 -1
ggml/include/ggml.h
CHANGED
|
@@ -753,6 +753,8 @@ extern "C" {
|
|
| 753 |
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
| 754 |
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
| 755 |
|
|
|
|
|
|
|
| 756 |
// use this to compute the memory overhead of a tensor
|
| 757 |
GGML_API size_t ggml_tensor_overhead(void);
|
| 758 |
|
|
@@ -2397,6 +2399,7 @@ extern "C" {
|
|
| 2397 |
GGML_API int ggml_cpu_has_rpc (void);
|
| 2398 |
GGML_API int ggml_cpu_has_vsx (void);
|
| 2399 |
GGML_API int ggml_cpu_has_matmul_int8(void);
|
|
|
|
| 2400 |
|
| 2401 |
//
|
| 2402 |
// Internal types and functions exposed for tests and benchmarks
|
|
|
|
| 753 |
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
| 754 |
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
| 755 |
|
| 756 |
+
GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
| 757 |
+
|
| 758 |
// use this to compute the memory overhead of a tensor
|
| 759 |
GGML_API size_t ggml_tensor_overhead(void);
|
| 760 |
|
|
|
|
| 2399 |
GGML_API int ggml_cpu_has_rpc (void);
|
| 2400 |
GGML_API int ggml_cpu_has_vsx (void);
|
| 2401 |
GGML_API int ggml_cpu_has_matmul_int8(void);
|
| 2402 |
+
GGML_API int ggml_cpu_has_cann (void);
|
| 2403 |
|
| 2404 |
//
|
| 2405 |
// Internal types and functions exposed for tests and benchmarks
|
ggml/src/CMakeLists.txt
CHANGED
|
@@ -770,6 +770,74 @@ if (GGML_CPU_HBM)
|
|
| 770 |
target_link_libraries(ggml PUBLIC memkind)
|
| 771 |
endif()
|
| 772 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 773 |
function(get_flags CCID CCVER)
|
| 774 |
set(C_FLAGS "")
|
| 775 |
set(CXX_FLAGS "")
|
|
@@ -1184,6 +1252,7 @@ add_library(ggml
|
|
| 1184 |
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
|
| 1185 |
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
|
| 1186 |
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
|
|
|
|
| 1187 |
ggml-aarch64.c ggml-aarch64.h
|
| 1188 |
)
|
| 1189 |
|
|
|
|
| 770 |
target_link_libraries(ggml PUBLIC memkind)
|
| 771 |
endif()
|
| 772 |
|
| 773 |
+
if (GGML_CANN)
|
| 774 |
+
if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
|
| 775 |
+
set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
|
| 776 |
+
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
|
| 777 |
+
endif()
|
| 778 |
+
|
| 779 |
+
if (CANN_INSTALL_DIR)
|
| 780 |
+
# Only Support Linux.
|
| 781 |
+
if (GGML_CANN)
|
| 782 |
+
if (NOT UNIX)
|
| 783 |
+
set(GGML_CANN OFF)
|
| 784 |
+
message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off GGML_CANN")
|
| 785 |
+
endif()
|
| 786 |
+
endif()
|
| 787 |
+
|
| 788 |
+
# Supported platforms: x86-64, arm64
|
| 789 |
+
if (GGML_CANN)
|
| 790 |
+
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
| 791 |
+
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
|
| 792 |
+
else()
|
| 793 |
+
set(GGML_CANN OFF)
|
| 794 |
+
message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off GGML_CANN")
|
| 795 |
+
endif()
|
| 796 |
+
endif()
|
| 797 |
+
|
| 798 |
+
# Set header and libs
|
| 799 |
+
if(GGML_CANN)
|
| 800 |
+
set(CANN_INCLUDE_DIRS
|
| 801 |
+
${CANN_INSTALL_DIR}/include
|
| 802 |
+
${CANN_INSTALL_DIR}/include/aclnn
|
| 803 |
+
${CANN_INSTALL_DIR}/acllib/include
|
| 804 |
+
)
|
| 805 |
+
|
| 806 |
+
# TODO: find libs
|
| 807 |
+
link_directories(
|
| 808 |
+
${CANN_INSTALL_DIR}/lib64
|
| 809 |
+
)
|
| 810 |
+
|
| 811 |
+
add_subdirectory(ggml-cann/kernels)
|
| 812 |
+
list(APPEND CANN_LIBRARIES
|
| 813 |
+
ascendcl
|
| 814 |
+
nnopbase
|
| 815 |
+
opapi
|
| 816 |
+
acl_op_compiler
|
| 817 |
+
ascendc_kernels
|
| 818 |
+
)
|
| 819 |
+
|
| 820 |
+
set(GGML_HEADERS_CANN "../include/ggml-cann.h")
|
| 821 |
+
file(GLOB GGML_SOURCES_CANN "ggml-cann/*.cpp")
|
| 822 |
+
list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")
|
| 823 |
+
|
| 824 |
+
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
|
| 825 |
+
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
|
| 826 |
+
|
| 827 |
+
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} )
|
| 828 |
+
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
|
| 829 |
+
list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
|
| 830 |
+
endif()
|
| 831 |
+
else()
|
| 832 |
+
set(GGML_CANN OFF)
|
| 833 |
+
message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off GGML_CANN")
|
| 834 |
+
endif()
|
| 835 |
+
|
| 836 |
+
if(NOT GGML_CANN)
|
| 837 |
+
message(WARNING "CANN: GGML_CANN is turned OFF, see above for details.")
|
| 838 |
+
endif()
|
| 839 |
+
endif()
|
| 840 |
+
|
| 841 |
function(get_flags CCID CCVER)
|
| 842 |
set(C_FLAGS "")
|
| 843 |
set(CXX_FLAGS "")
|
|
|
|
| 1252 |
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
|
| 1253 |
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
|
| 1254 |
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
|
| 1255 |
+
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
|
| 1256 |
ggml-aarch64.c ggml-aarch64.h
|
| 1257 |
)
|
| 1258 |
|
ggml/src/ggml-backend.c
CHANGED
|
@@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
|
|
| 445 |
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
|
| 446 |
ggml_backend_kompute_reg_devices();
|
| 447 |
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
}
|
| 449 |
|
| 450 |
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
|
|
|
|
| 445 |
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
|
| 446 |
ggml_backend_kompute_reg_devices();
|
| 447 |
#endif
|
| 448 |
+
|
| 449 |
+
#ifdef GGML_USE_CANN
|
| 450 |
+
extern GGML_CALL int ggml_backend_cann_reg_devices(void);
|
| 451 |
+
ggml_backend_cann_reg_devices();
|
| 452 |
+
#endif
|
| 453 |
}
|
| 454 |
|
| 455 |
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
|
ggml/src/ggml.c
CHANGED
|
@@ -3341,7 +3341,7 @@ bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tenso
|
|
| 3341 |
}
|
| 3342 |
|
| 3343 |
// check if t1 can be represented as a repeatition of t0
|
| 3344 |
-
|
| 3345 |
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
| 3346 |
|
| 3347 |
return ggml_is_empty(t0) ? ggml_is_empty(t1) :
|
|
@@ -13699,6 +13699,7 @@ static void ggml_compute_forward_soft_max(
|
|
| 13699 |
}
|
| 13700 |
}
|
| 13701 |
|
|
|
|
| 13702 |
// ggml_compute_forward_soft_max_back
|
| 13703 |
|
| 13704 |
static void ggml_compute_forward_soft_max_back_f32(
|
|
@@ -21994,6 +21995,14 @@ int ggml_cpu_has_rpc(void) {
|
|
| 21994 |
#endif
|
| 21995 |
}
|
| 21996 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21997 |
int ggml_cpu_has_gpublas(void) {
|
| 21998 |
return ggml_cpu_has_cuda() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() || ggml_cpu_has_sycl();
|
| 21999 |
}
|
|
|
|
| 3341 |
}
|
| 3342 |
|
| 3343 |
// check if t1 can be represented as a repeatition of t0
|
| 3344 |
+
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
| 3345 |
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
| 3346 |
|
| 3347 |
return ggml_is_empty(t0) ? ggml_is_empty(t1) :
|
|
|
|
| 13699 |
}
|
| 13700 |
}
|
| 13701 |
|
| 13702 |
+
|
| 13703 |
// ggml_compute_forward_soft_max_back
|
| 13704 |
|
| 13705 |
static void ggml_compute_forward_soft_max_back_f32(
|
|
|
|
| 21995 |
#endif
|
| 21996 |
}
|
| 21997 |
|
| 21998 |
+
int ggml_cpu_has_cann(void) {
|
| 21999 |
+
#if defined(GGML_USE_CANN)
|
| 22000 |
+
return 1;
|
| 22001 |
+
#else
|
| 22002 |
+
return 0;
|
| 22003 |
+
#endif
|
| 22004 |
+
}
|
| 22005 |
+
|
| 22006 |
int ggml_cpu_has_gpublas(void) {
|
| 22007 |
return ggml_cpu_has_cuda() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() || ggml_cpu_has_sycl();
|
| 22008 |
}
|