Spaces:
Running
Running
midnight
midnight
commited on
cmake : fix compile assumptions for power9/etc (#2777)
Browse files* Add small comment re: VSX to readme
Co-authored-by: midnight <[email protected]>
- README.md +15 -1
- ggml/src/ggml-cpu/CMakeLists.txt +7 -11
README.md
CHANGED
|
@@ -17,7 +17,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
|
|
| 17 |
- Plain C/C++ implementation without dependencies
|
| 18 |
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
|
| 19 |
- AVX intrinsics support for x86 architectures
|
| 20 |
-
- VSX intrinsics support for POWER architectures
|
| 21 |
- Mixed F16 / F32 precision
|
| 22 |
- [Integer quantization support](#quantization)
|
| 23 |
- Zero memory allocations at runtime
|
|
@@ -139,6 +139,20 @@ make -j large-v3-turbo
|
|
| 139 |
| medium | 1.5 GiB | ~2.1 GB |
|
| 140 |
| large | 2.9 GiB | ~3.9 GB |
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
## Quantization
|
| 143 |
|
| 144 |
`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
|
|
|
|
| 17 |
- Plain C/C++ implementation without dependencies
|
| 18 |
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
|
| 19 |
- AVX intrinsics support for x86 architectures
|
| 20 |
+
- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
|
| 21 |
- Mixed F16 / F32 precision
|
| 22 |
- [Integer quantization support](#quantization)
|
| 23 |
- Zero memory allocations at runtime
|
|
|
|
| 139 |
| medium | 1.5 GiB | ~2.1 GB |
|
| 140 |
| large | 2.9 GiB | ~3.9 GB |
|
| 141 |
|
| 142 |
+
## POWER VSX Intrinsics
|
| 143 |
+
|
| 144 |
+
`whisper.cpp` supports POWER architectures and includes code which
|
| 145 |
+
significantly speeds operation on Linux running on POWER9/10, making it
|
| 146 |
+
capable of faster-than-realtime transcription on underclocked Raptor
|
| 147 |
+
Talos II. Ensure you have a BLAS package installed, and replace the
|
| 148 |
+
standard cmake setup with:
|
| 149 |
+
|
| 150 |
+
```bash
|
| 151 |
+
# build with GGML_BLAS defined
|
| 152 |
+
cmake -B build -DGGML_BLAS=1
|
| 153 |
+
cmake --build build --config Release
|
| 154 |
+
./build/bin/whisper-cli [ .. etc .. ]
|
| 155 |
+
|
| 156 |
## Quantization
|
| 157 |
|
| 158 |
`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
|
ggml/src/ggml-cpu/CMakeLists.txt
CHANGED
|
@@ -279,19 +279,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
| 279 |
endif()
|
| 280 |
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
| 281 |
message(STATUS "PowerPC detected")
|
| 282 |
-
execute_process(COMMAND bash -c "grep
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
if (${substring_index} GREATER_EQUAL 0)
|
| 289 |
-
list(APPEND ARCH_FLAGS -mcpu=power10)
|
| 290 |
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
| 291 |
-
|
| 292 |
else()
|
| 293 |
-
list(APPEND ARCH_FLAGS -mcpu=
|
| 294 |
-
# TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
|
| 295 |
endif()
|
| 296 |
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
| 297 |
message(STATUS "loongarch64 detected")
|
|
|
|
| 279 |
endif()
|
| 280 |
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
| 281 |
message(STATUS "PowerPC detected")
|
| 282 |
+
execute_process(COMMAND bash -c "grep POWER /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER_M)
|
| 283 |
+
if (${POWER_M} MATCHES "POWER10")
|
| 284 |
+
list(APPEND ARCH_FLAGS -mcpu=power10)
|
| 285 |
+
elseif (${POWER_M} MATCHES "POWER9")
|
| 286 |
+
list(APPEND ARCH_FLAGS -mcpu=power9)
|
|
|
|
|
|
|
|
|
|
| 287 |
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
| 288 |
+
list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
|
| 289 |
else()
|
| 290 |
+
list(APPEND ARCH_FLAGS -mcpu=powerpc64 -mtune=native)
|
|
|
|
| 291 |
endif()
|
| 292 |
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
| 293 |
message(STATUS "loongarch64 detected")
|