Diego Devesa commited on
Commit
f8795d3
·
1 Parent(s): a29a2c3

ggml : add SSE 4.2 and x64 base variant for CPUs without AVX (llama/12871)

Browse files

* ggml : add SSE 4.2 variant for CPUs without AVX

* ggml : add x64 base ABI variant

ggml/CMakeLists.txt CHANGED
@@ -107,6 +107,7 @@ message(DEBUG "INS_ENB : ${INS_ENB}")
107
  option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
108
  option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
109
  option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
 
110
  option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
111
  option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
112
  option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
 
107
  option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
108
  option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
109
  option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
110
+ option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
111
  option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
112
  option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
113
  option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
ggml/src/CMakeLists.txt CHANGED
@@ -267,6 +267,7 @@ function(ggml_add_cpu_backend_variant tag_name)
267
  set(GGML_CPU_TAG_NAME ${tag_name})
268
  # other: OPENMP LLAMAFILE CPU_HBM
269
  foreach (feat NATIVE
 
270
  AVX AVX2 BMI2 AVX_VNNI FMA F16C
271
  AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
272
  AMX_TILE AMX_INT8 AMX_BF16)
@@ -286,14 +287,16 @@ if (GGML_CPU_ALL_VARIANTS)
286
  if (NOT GGML_BACKEND_DL)
287
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
288
  endif()
289
- ggml_add_cpu_backend_variant(sandybridge AVX)
290
- ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA)
291
- ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512)
292
- ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
293
- ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI)
 
 
294
  if (NOT MSVC)
295
  # MSVC doesn't support AMX
296
- ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
297
  endif()
298
  elseif (GGML_CPU)
299
  ggml_add_cpu_backend_variant_impl("")
 
267
  set(GGML_CPU_TAG_NAME ${tag_name})
268
  # other: OPENMP LLAMAFILE CPU_HBM
269
  foreach (feat NATIVE
270
+ SSE42
271
  AVX AVX2 BMI2 AVX_VNNI FMA F16C
272
  AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
273
  AMX_TILE AMX_INT8 AMX_BF16)
 
287
  if (NOT GGML_BACKEND_DL)
288
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
289
  endif()
290
+ ggml_add_cpu_backend_variant(x64)
291
+ ggml_add_cpu_backend_variant(sse42 SSE42)
292
+ ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
293
+ ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
294
+ ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
295
+ ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
296
+ ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
297
  if (NOT MSVC)
298
  # MSVC doesn't support AMX
299
+ ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
300
  endif()
301
  elseif (GGML_CPU)
302
  ggml_add_cpu_backend_variant_impl("")
ggml/src/ggml-cpu/CMakeLists.txt CHANGED
@@ -222,7 +222,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
222
  elseif (GGML_AVX)
223
  list(APPEND ARCH_FLAGS /arch:AVX)
224
  list(APPEND ARCH_DEFINITIONS GGML_AVX)
225
- else ()
226
  list(APPEND ARCH_FLAGS /arch:SSE4.2)
227
  list(APPEND ARCH_DEFINITIONS GGML_SSE42)
228
  endif()
@@ -237,8 +237,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
237
  if (GGML_NATIVE)
238
  list(APPEND ARCH_FLAGS -march=native)
239
  else ()
240
- list(APPEND ARCH_FLAGS -msse4.2)
241
- list(APPEND ARCH_DEFINITIONS GGML_SSE42)
 
 
242
  if (GGML_F16C)
243
  list(APPEND ARCH_FLAGS -mf16c)
244
  list(APPEND ARCH_DEFINITIONS GGML_F16C)
 
222
  elseif (GGML_AVX)
223
  list(APPEND ARCH_FLAGS /arch:AVX)
224
  list(APPEND ARCH_DEFINITIONS GGML_AVX)
225
+ elseif (GGML_SSE42)
226
  list(APPEND ARCH_FLAGS /arch:SSE4.2)
227
  list(APPEND ARCH_DEFINITIONS GGML_SSE42)
228
  endif()
 
237
  if (GGML_NATIVE)
238
  list(APPEND ARCH_FLAGS -march=native)
239
  else ()
240
+ if (GGML_SSE42)
241
+ list(APPEND ARCH_FLAGS -msse4.2)
242
+ list(APPEND ARCH_DEFINITIONS GGML_SSE42)
243
+ endif()
244
  if (GGML_F16C)
245
  list(APPEND ARCH_FLAGS -mf16c)
246
  list(APPEND ARCH_DEFINITIONS GGML_F16C)
ggml/src/ggml-cpu/cpu-feats-x86.cpp CHANGED
@@ -263,7 +263,7 @@ void test_x86_is() {
263
  static int ggml_backend_cpu_x86_score() {
264
  // FIXME: this does not check for OS support
265
 
266
- int score = 0;
267
  cpuid_x86 is;
268
 
269
  #ifdef GGML_FMA
 
263
  static int ggml_backend_cpu_x86_score() {
264
  // FIXME: this does not check for OS support
265
 
266
+ int score = 1;
267
  cpuid_x86 is;
268
 
269
  #ifdef GGML_FMA