Diego Devesa Adrien Gallouët commited on
Commit
e58e7a9
·
1 Parent(s): 8d0f0ac

ggml : fix arm build (llama/10890)

Browse files

* ggml: GGML_NATIVE uses -mcpu=native on ARM

Signed-off-by: Adrien Gallouët <[email protected]>

* ggml: Show detected features with GGML_NATIVE

Signed-off-by: Adrien Gallouët <[email protected]>

* remove msvc support, add GGML_CPU_ARM_ARCH option

* disable llamafile in android example

* march -> mcpu, skip adding feature macros

ggml-ci

---------

Signed-off-by: Adrien Gallouët <[email protected]>
Co-authored-by: Adrien Gallouët <[email protected]>

ggml/CMakeLists.txt CHANGED
@@ -74,10 +74,10 @@ if (NOT GGML_CUDA_GRAPHS_DEFAULT)
74
  endif()
75
 
76
  # general
77
- option(GGML_STATIC "ggml: static link libraries" OFF)
78
- option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
79
- option(GGML_LTO "ggml: enable link time optimization" OFF)
80
- option(GGML_CCACHE "ggml: use ccache if available" ON)
81
 
82
  # debug
83
  option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
@@ -120,8 +120,9 @@ endif()
120
  option(GGML_LASX "ggml: enable lasx" ON)
121
  option(GGML_LSX "ggml: enable lsx" ON)
122
  option(GGML_RVV "ggml: enable rvv" ON)
123
- option(GGML_SVE "ggml: enable SVE" OFF)
124
  option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
 
125
 
126
 
127
  if (WIN32)
 
74
  endif()
75
 
76
  # general
77
+ option(GGML_STATIC "ggml: static link libraries" OFF)
78
+ option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT})
79
+ option(GGML_LTO "ggml: enable link time optimization" OFF)
80
+ option(GGML_CCACHE "ggml: use ccache if available" ON)
81
 
82
  # debug
83
  option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
 
120
  option(GGML_LASX "ggml: enable lasx" ON)
121
  option(GGML_LSX "ggml: enable lsx" ON)
122
  option(GGML_RVV "ggml: enable rvv" ON)
123
+
124
  option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
125
+ set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
126
 
127
 
128
  if (WIN32)
ggml/src/ggml-cpu/CMakeLists.txt CHANGED
@@ -74,112 +74,77 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
74
 
75
  if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
76
  CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
77
- (NOT CMAKE_OSX_ARCHITECTURES AND
78
- NOT CMAKE_GENERATOR_PLATFORM_LWR AND
79
  CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
80
 
81
  message(STATUS "ARM detected")
82
 
83
- if (MSVC)
84
- list(APPEND ARCH_DEFINITIONS __aarch64__) # MSVC defines _M_ARM64 instead
85
- list(APPEND ARCH_DEFINITIONS __ARM_NEON)
86
- list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_FMA)
87
-
88
- set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
89
- string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
90
-
91
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
92
- if (GGML_COMPILER_SUPPORT_DOTPROD)
93
- list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
94
-
95
- message(STATUS "ARM feature DOTPROD enabled")
96
- endif ()
97
-
98
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
99
-
100
- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
101
- list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
102
-
103
- message(STATUS "ARM feature MATMUL_INT8 enabled")
104
- endif ()
105
-
106
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
107
- if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
108
- list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
109
-
110
- message(STATUS "ARM feature FP16_VECTOR_ARITHMETIC enabled")
111
- endif ()
112
 
113
- set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
114
- elseif (APPLE)
115
  if (GGML_NATIVE)
116
- set(USER_PROVIDED_MARCH FALSE)
117
- foreach(flag_var IN ITEMS CMAKE_C_FLAGS CMAKE_CXX_FLAGS CMAKE_REQUIRED_FLAGS)
118
- if ("${${flag_var}}" MATCHES "-march=[a-zA-Z0-9+._-]+")
119
- set(USER_PROVIDED_MARCH TRUE)
120
- break()
121
- endif()
122
- endforeach()
123
-
124
- if (NOT USER_PROVIDED_MARCH)
125
- set(MARCH_FLAGS "-march=armv8.2a")
126
-
127
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
128
- if (GGML_COMPILER_SUPPORT_DOTPROD)
129
- set(MARCH_FLAGS "${MARCH_FLAGS}+dotprod")
130
- list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
131
 
132
- message(STATUS "ARM feature DOTPROD enabled")
133
- endif ()
134
 
135
- set(TEST_I8MM_FLAGS "-march=armv8.2a+i8mm")
 
136
 
137
- set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
138
- set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${TEST_I8MM_FLAGS}")
139
 
140
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
141
- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
142
- set(MARCH_FLAGS "${MARCH_FLAGS}+i8mm")
143
- list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
 
 
 
144
 
145
- message(STATUS "ARM feature MATMUL_INT8 enabled")
146
- endif ()
 
 
 
 
 
147
 
148
- set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
149
 
150
- list(APPEND ARCH_FLAGS "${MARCH_FLAGS}")
151
- endif ()
152
- endif ()
153
- else()
154
- check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
155
- if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
156
- list(APPEND ARCH_FLAGS -mfp16-format=ieee)
157
- endif()
158
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
159
- # Raspberry Pi 1, Zero
160
- list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
161
- endif()
162
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
163
- if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
164
- # Android armeabi-v7a
165
- list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
166
- else()
167
- # Raspberry Pi 2
168
- list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
169
  endif()
170
  endif()
171
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
172
- # Android arm64-v8a
173
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
174
- list(APPEND ARCH_FLAGS -mno-unaligned-access)
175
- endif()
176
- if (GGML_SVE)
177
- list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
 
 
 
 
 
 
 
 
 
 
178
  endif()
179
  endif()
180
  elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
181
  (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
182
  CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
 
 
 
183
  if (MSVC)
184
  # instruction set detection for MSVC only
185
  if (GGML_NATIVE)
 
74
 
75
  if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
76
  CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
77
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
 
78
  CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
79
 
80
  message(STATUS "ARM detected")
81
 
82
+ if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
83
+ message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
84
+ else()
85
+ check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
86
+ if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
87
+ list(APPEND ARCH_FLAGS -mfp16-format=ieee)
88
+ endif()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
 
 
90
  if (GGML_NATIVE)
91
+ list(APPEND ARCH_FLAGS -mcpu=native)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
 
94
 
95
+ # -mcpu=native does not always enable all the features in some compilers,
96
+ # so we check for them manually and enable them if available
97
 
98
+ include(CheckCXXSourceRuns)
 
99
 
100
+ set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}+dotprod")
101
+ check_cxx_source_runs(
102
+ "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
103
+ GGML_COMPILER_SUPPORT_DOTPROD)
104
+ if (GGML_COMPILER_SUPPORT_DOTPROD)
105
+ set(ARCH_FLAGS "${ARCH_FLAGS}+dotprod")
106
+ endif()
107
 
108
+ set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}+i8mm")
109
+ check_cxx_source_runs(
110
+ "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }"
111
+ GGML_COMPILER_SUPPORT_I8MM)
112
+ if (GGML_COMPILER_SUPPORT_I8MM)
113
+ set(ARCH_FLAGS "${ARCH_FLAGS}+i8mm")
114
+ endif()
115
 
116
+ set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
117
 
118
+ else()
119
+ if (GGML_CPU_ARM_ARCH)
120
+ list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  endif()
122
  endif()
123
+
124
+ # show enabled features
125
+ execute_process(
126
+ COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
127
+ INPUT_FILE "/dev/null"
128
+ OUTPUT_VARIABLE ARM_FEATURE
129
+ RESULT_VARIABLE ARM_FEATURE_RESULT
130
+ )
131
+ if (ARM_FEATURE_RESULT)
132
+ message(FATAL_ERROR "Failed to get ARM features")
133
+ else()
134
+ foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
135
+ string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
136
+ if (NOT ${feature_pos} EQUAL -1)
137
+ message(STATUS "ARM feature ${feature} enabled")
138
+ endif()
139
+ endforeach()
140
  endif()
141
  endif()
142
  elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
143
  (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
144
  CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
145
+
146
+ message(STATUS "x86 detected")
147
+
148
  if (MSVC)
149
  # instruction set detection for MSVC only
150
  if (GGML_NATIVE)
ggml/src/ggml-cpu/ggml-cpu.cpp CHANGED
@@ -522,6 +522,12 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
522
  if (ggml_cpu_has_sve()) {
523
  features.push_back({ "SVE", "1" });
524
  }
 
 
 
 
 
 
525
  if (ggml_cpu_get_sve_cnt() > 0) {
526
  static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
527
  features.push_back({ "SVE_CNT", sve_cnt.c_str() });
 
522
  if (ggml_cpu_has_sve()) {
523
  features.push_back({ "SVE", "1" });
524
  }
525
+ if (ggml_cpu_has_dotprod()) {
526
+ features.push_back({ "DOTPROD", "1" });
527
+ }
528
+ if (ggml_cpu_has_matmul_int8()) {
529
+ features.push_back({ "MATMUL_INT8", "1" });
530
+ }
531
  if (ggml_cpu_get_sve_cnt() > 0) {
532
  static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
533
  features.push_back({ "SVE_CNT", sve_cnt.c_str() });
ggml/src/ggml-cpu/llamafile/sgemm.cpp CHANGED
@@ -204,6 +204,7 @@ template <> inline float32x4_t load(const float *p) {
204
  return vld1q_f32(p);
205
  }
206
  #if !defined(_MSC_VER)
 
207
  template <> inline float16x8_t load(const ggml_fp16_t *p) {
208
  return vld1q_f16((const float16_t *)p);
209
  }
 
204
  return vld1q_f32(p);
205
  }
206
  #if !defined(_MSC_VER)
207
+ // FIXME: this should check for __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
208
  template <> inline float16x8_t load(const ggml_fp16_t *p) {
209
  return vld1q_f16((const float16_t *)p);
210
  }