hipudding wangshuai09 commited on
Commit
3175a17
·
1 Parent(s): 5096c91

Add Ascend NPU backend (llama/6035)

Browse files

* [CANN] Add Ascend NPU backend

Ascend is a full-stack AI computing infrastructure for industry
applications and services based on Huawei Ascend processors and
software.

CANN (Compute Architecture of Neural Networks), developped by
Huawei, is a heterogeneous computing architecture for AI.

Co-authored-by: wangshuai09 <[email protected]>

* delete trailing whitespaces

* Modify the code based on review comment

* Rename LLAMA_CANN to GGML_CANN

* Make ggml-common.h private

* add ggml_cann prefix for acl funcs

* Add logging for CANN backend

* Delete Trailing whitespace

---------

Co-authored-by: wangshuai09 <[email protected]>

ggml/include/ggml.h CHANGED
@@ -753,6 +753,8 @@ extern "C" {
753
  GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
754
  GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
755
 
 
 
756
  // use this to compute the memory overhead of a tensor
757
  GGML_API size_t ggml_tensor_overhead(void);
758
 
@@ -2397,6 +2399,7 @@ extern "C" {
2397
  GGML_API int ggml_cpu_has_rpc (void);
2398
  GGML_API int ggml_cpu_has_vsx (void);
2399
  GGML_API int ggml_cpu_has_matmul_int8(void);
 
2400
 
2401
  //
2402
  // Internal types and functions exposed for tests and benchmarks
 
753
  GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
754
  GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
755
 
756
+ GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
757
+
758
  // use this to compute the memory overhead of a tensor
759
  GGML_API size_t ggml_tensor_overhead(void);
760
 
 
2399
  GGML_API int ggml_cpu_has_rpc (void);
2400
  GGML_API int ggml_cpu_has_vsx (void);
2401
  GGML_API int ggml_cpu_has_matmul_int8(void);
2402
+ GGML_API int ggml_cpu_has_cann (void);
2403
 
2404
  //
2405
  // Internal types and functions exposed for tests and benchmarks
ggml/src/CMakeLists.txt CHANGED
@@ -770,6 +770,74 @@ if (GGML_CPU_HBM)
770
  target_link_libraries(ggml PUBLIC memkind)
771
  endif()
772
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
773
  function(get_flags CCID CCVER)
774
  set(C_FLAGS "")
775
  set(CXX_FLAGS "")
@@ -1184,6 +1252,7 @@ add_library(ggml
1184
  ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1185
  ${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
1186
  ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
 
1187
  ggml-aarch64.c ggml-aarch64.h
1188
  )
1189
 
 
770
  target_link_libraries(ggml PUBLIC memkind)
771
  endif()
772
 
773
+ if (GGML_CANN)
774
+ if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
775
+ set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
776
+ message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
777
+ endif()
778
+
779
+ if (CANN_INSTALL_DIR)
780
+ # Only Support Linux.
781
+ if (GGML_CANN)
782
+ if (NOT UNIX)
783
+ set(GGML_CANN OFF)
784
+ message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off GGML_CANN")
785
+ endif()
786
+ endif()
787
+
788
+ # Supported platforms: x86-64, arm64
789
+ if (GGML_CANN)
790
+ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
791
+ elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
792
+ else()
793
+ set(GGML_CANN OFF)
794
+ message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off GGML_CANN")
795
+ endif()
796
+ endif()
797
+
798
+ # Set header and libs
799
+ if(GGML_CANN)
800
+ set(CANN_INCLUDE_DIRS
801
+ ${CANN_INSTALL_DIR}/include
802
+ ${CANN_INSTALL_DIR}/include/aclnn
803
+ ${CANN_INSTALL_DIR}/acllib/include
804
+ )
805
+
806
+ # TODO: find libs
807
+ link_directories(
808
+ ${CANN_INSTALL_DIR}/lib64
809
+ )
810
+
811
+ add_subdirectory(ggml-cann/kernels)
812
+ list(APPEND CANN_LIBRARIES
813
+ ascendcl
814
+ nnopbase
815
+ opapi
816
+ acl_op_compiler
817
+ ascendc_kernels
818
+ )
819
+
820
+ set(GGML_HEADERS_CANN "../include/ggml-cann.h")
821
+ file(GLOB GGML_SOURCES_CANN "ggml-cann/*.cpp")
822
+ list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")
823
+
824
+ message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
825
+ message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
826
+
827
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} )
828
+ set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
829
+ list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
830
+ endif()
831
+ else()
832
+ set(GGML_CANN OFF)
833
+ message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off GGML_CANN")
834
+ endif()
835
+
836
+ if(NOT GGML_CANN)
837
+ message(WARNING "CANN: GGML_CANN is turned OFF, see above for details.")
838
+ endif()
839
+ endif()
840
+
841
  function(get_flags CCID CCVER)
842
  set(C_FLAGS "")
843
  set(CXX_FLAGS "")
 
1252
  ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1253
  ${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
1254
  ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
1255
+ ${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
1256
  ggml-aarch64.c ggml-aarch64.h
1257
  )
1258
 
ggml/src/ggml-backend.c CHANGED
@@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
445
  extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
446
  ggml_backend_kompute_reg_devices();
447
  #endif
 
 
 
 
 
448
  }
449
 
450
  GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
 
445
  extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
446
  ggml_backend_kompute_reg_devices();
447
  #endif
448
+
449
+ #ifdef GGML_USE_CANN
450
+ extern GGML_CALL int ggml_backend_cann_reg_devices(void);
451
+ ggml_backend_cann_reg_devices();
452
+ #endif
453
  }
454
 
455
  GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
ggml/src/ggml.c CHANGED
@@ -3341,7 +3341,7 @@ bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tenso
3341
  }
3342
 
3343
  // check if t1 can be represented as a repeatition of t0
3344
- static inline bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3345
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3346
 
3347
  return ggml_is_empty(t0) ? ggml_is_empty(t1) :
@@ -13699,6 +13699,7 @@ static void ggml_compute_forward_soft_max(
13699
  }
13700
  }
13701
 
 
13702
  // ggml_compute_forward_soft_max_back
13703
 
13704
  static void ggml_compute_forward_soft_max_back_f32(
@@ -21994,6 +21995,14 @@ int ggml_cpu_has_rpc(void) {
21994
  #endif
21995
  }
21996
 
 
 
 
 
 
 
 
 
21997
  int ggml_cpu_has_gpublas(void) {
21998
  return ggml_cpu_has_cuda() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() || ggml_cpu_has_sycl();
21999
  }
 
3341
  }
3342
 
3343
  // check if t1 can be represented as a repeatition of t0
3344
+ bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3345
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3346
 
3347
  return ggml_is_empty(t0) ? ggml_is_empty(t1) :
 
13699
  }
13700
  }
13701
 
13702
+
13703
  // ggml_compute_forward_soft_max_back
13704
 
13705
  static void ggml_compute_forward_soft_max_back_f32(
 
21995
  #endif
21996
  }
21997
 
21998
+ int ggml_cpu_has_cann(void) {
21999
+ #if defined(GGML_USE_CANN)
22000
+ return 1;
22001
+ #else
22002
+ return 0;
22003
+ #endif
22004
+ }
22005
+
22006
  int ggml_cpu_has_gpublas(void) {
22007
  return ggml_cpu_has_cuda() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() || ggml_cpu_has_sycl();
22008
  }