Spaces:
Sleeping
Sleeping
Diego Devesa
commited on
Commit
·
f8795d3
1
Parent(s):
a29a2c3
ggml : add SSE 4.2 and x64 base variant for CPUs without AVX (llama/12871)
Browse files* ggml : add SSE 4.2 variant for CPUs without AVX
* ggml : add x64 base ABI variant
ggml/CMakeLists.txt
CHANGED
|
@@ -107,6 +107,7 @@ message(DEBUG "INS_ENB : ${INS_ENB}")
|
|
| 107 |
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
| 108 |
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
|
| 109 |
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
|
|
|
|
| 110 |
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
| 111 |
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
|
| 112 |
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
|
|
|
| 107 |
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
| 108 |
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
|
| 109 |
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
|
| 110 |
+
option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
|
| 111 |
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
| 112 |
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
|
| 113 |
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
ggml/src/CMakeLists.txt
CHANGED
|
@@ -267,6 +267,7 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
| 267 |
set(GGML_CPU_TAG_NAME ${tag_name})
|
| 268 |
# other: OPENMP LLAMAFILE CPU_HBM
|
| 269 |
foreach (feat NATIVE
|
|
|
|
| 270 |
AVX AVX2 BMI2 AVX_VNNI FMA F16C
|
| 271 |
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
|
| 272 |
AMX_TILE AMX_INT8 AMX_BF16)
|
|
@@ -286,14 +287,16 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
| 286 |
if (NOT GGML_BACKEND_DL)
|
| 287 |
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
| 288 |
endif()
|
| 289 |
-
ggml_add_cpu_backend_variant(
|
| 290 |
-
ggml_add_cpu_backend_variant(
|
| 291 |
-
ggml_add_cpu_backend_variant(
|
| 292 |
-
ggml_add_cpu_backend_variant(
|
| 293 |
-
ggml_add_cpu_backend_variant(
|
|
|
|
|
|
|
| 294 |
if (NOT MSVC)
|
| 295 |
# MSVC doesn't support AMX
|
| 296 |
-
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
| 297 |
endif()
|
| 298 |
elseif (GGML_CPU)
|
| 299 |
ggml_add_cpu_backend_variant_impl("")
|
|
|
|
| 267 |
set(GGML_CPU_TAG_NAME ${tag_name})
|
| 268 |
# other: OPENMP LLAMAFILE CPU_HBM
|
| 269 |
foreach (feat NATIVE
|
| 270 |
+
SSE42
|
| 271 |
AVX AVX2 BMI2 AVX_VNNI FMA F16C
|
| 272 |
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
|
| 273 |
AMX_TILE AMX_INT8 AMX_BF16)
|
|
|
|
| 287 |
if (NOT GGML_BACKEND_DL)
|
| 288 |
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
| 289 |
endif()
|
| 290 |
+
ggml_add_cpu_backend_variant(x64)
|
| 291 |
+
ggml_add_cpu_backend_variant(sse42 SSE42)
|
| 292 |
+
ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
|
| 293 |
+
ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
|
| 294 |
+
ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
|
| 295 |
+
ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
| 296 |
+
ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
|
| 297 |
if (NOT MSVC)
|
| 298 |
# MSVC doesn't support AMX
|
| 299 |
+
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
| 300 |
endif()
|
| 301 |
elseif (GGML_CPU)
|
| 302 |
ggml_add_cpu_backend_variant_impl("")
|
ggml/src/ggml-cpu/CMakeLists.txt
CHANGED
|
@@ -222,7 +222,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
| 222 |
elseif (GGML_AVX)
|
| 223 |
list(APPEND ARCH_FLAGS /arch:AVX)
|
| 224 |
list(APPEND ARCH_DEFINITIONS GGML_AVX)
|
| 225 |
-
|
| 226 |
list(APPEND ARCH_FLAGS /arch:SSE4.2)
|
| 227 |
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
|
| 228 |
endif()
|
|
@@ -237,8 +237,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
| 237 |
if (GGML_NATIVE)
|
| 238 |
list(APPEND ARCH_FLAGS -march=native)
|
| 239 |
else ()
|
| 240 |
-
|
| 241 |
-
|
|
|
|
|
|
|
| 242 |
if (GGML_F16C)
|
| 243 |
list(APPEND ARCH_FLAGS -mf16c)
|
| 244 |
list(APPEND ARCH_DEFINITIONS GGML_F16C)
|
|
|
|
| 222 |
elseif (GGML_AVX)
|
| 223 |
list(APPEND ARCH_FLAGS /arch:AVX)
|
| 224 |
list(APPEND ARCH_DEFINITIONS GGML_AVX)
|
| 225 |
+
elseif (GGML_SSE42)
|
| 226 |
list(APPEND ARCH_FLAGS /arch:SSE4.2)
|
| 227 |
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
|
| 228 |
endif()
|
|
|
|
| 237 |
if (GGML_NATIVE)
|
| 238 |
list(APPEND ARCH_FLAGS -march=native)
|
| 239 |
else ()
|
| 240 |
+
if (GGML_SSE42)
|
| 241 |
+
list(APPEND ARCH_FLAGS -msse4.2)
|
| 242 |
+
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
|
| 243 |
+
endif()
|
| 244 |
if (GGML_F16C)
|
| 245 |
list(APPEND ARCH_FLAGS -mf16c)
|
| 246 |
list(APPEND ARCH_DEFINITIONS GGML_F16C)
|
ggml/src/ggml-cpu/cpu-feats-x86.cpp
CHANGED
|
@@ -263,7 +263,7 @@ void test_x86_is() {
|
|
| 263 |
static int ggml_backend_cpu_x86_score() {
|
| 264 |
// FIXME: this does not check for OS support
|
| 265 |
|
| 266 |
-
int score =
|
| 267 |
cpuid_x86 is;
|
| 268 |
|
| 269 |
#ifdef GGML_FMA
|
|
|
|
| 263 |
static int ggml_backend_cpu_x86_score() {
|
| 264 |
// FIXME: this does not check for OS support
|
| 265 |
|
| 266 |
+
int score = 1;
|
| 267 |
cpuid_x86 is;
|
| 268 |
|
| 269 |
#ifdef GGML_FMA
|