Spaces:
Sleeping
Sleeping
fitzsim
commited on
whisper : PPC64 big-endian support (#398)
Browse files* ggml : set cache line size to 128 on POWER9
* whisper : add PPC64 big endian support
- Makefile +5 -1
- ggml.c +10 -3
- whisper.cpp +65 -0
Makefile
CHANGED
|
@@ -115,11 +115,15 @@ endif
|
|
| 115 |
ifeq ($(UNAME_M),amd64)
|
| 116 |
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
| 117 |
endif
|
| 118 |
-
|
| 119 |
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
| 120 |
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
| 121 |
CFLAGS += -mpower9-vector
|
| 122 |
endif
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
endif
|
| 124 |
ifndef WHISPER_NO_ACCELERATE
|
| 125 |
# Mac M1 - include Accelerate framework
|
|
|
|
| 115 |
ifeq ($(UNAME_M),amd64)
|
| 116 |
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
| 117 |
endif
|
| 118 |
+
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
| 119 |
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
| 120 |
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
| 121 |
CFLAGS += -mpower9-vector
|
| 122 |
endif
|
| 123 |
+
# Require c++23's std::byteswap for big-endian support.
|
| 124 |
+
ifeq ($(UNAME_M),ppc64)
|
| 125 |
+
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
| 126 |
+
endif
|
| 127 |
endif
|
| 128 |
ifndef WHISPER_NO_ACCELERATE
|
| 129 |
# Mac M1 - include Accelerate framework
|
ggml.c
CHANGED
|
@@ -339,8 +339,12 @@ int64_t ggml_cycles_per_ms(void) {
|
|
| 339 |
#if defined(__cpp_lib_hardware_interference_size)
|
| 340 |
#define CACHE_LINE_SIZE hardware_destructive_interference_size
|
| 341 |
#else
|
|
|
|
|
|
|
|
|
|
| 342 |
#define CACHE_LINE_SIZE 64
|
| 343 |
#endif
|
|
|
|
| 344 |
|
| 345 |
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
| 346 |
|
|
@@ -609,9 +613,12 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
|
| 609 |
#define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \
|
| 610 |
vec_extract_fp32_from_shorth(vec_xl(0, p - GGML_F16_EPR)) : \
|
| 611 |
vec_extract_fp32_from_shortl(vec_xl(0, p))
|
| 612 |
-
#define
|
| 613 |
-
|
| 614 |
-
|
|
|
|
|
|
|
|
|
|
| 615 |
|
| 616 |
#elif defined(__wasm_simd128__)
|
| 617 |
|
|
|
|
| 339 |
#if defined(__cpp_lib_hardware_interference_size)
|
| 340 |
#define CACHE_LINE_SIZE hardware_destructive_interference_size
|
| 341 |
#else
|
| 342 |
+
#if defined(__POWER9_VECTOR__)
|
| 343 |
+
#define CACHE_LINE_SIZE 128
|
| 344 |
+
#else
|
| 345 |
#define CACHE_LINE_SIZE 64
|
| 346 |
#endif
|
| 347 |
+
#endif
|
| 348 |
|
| 349 |
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
| 350 |
|
|
|
|
| 613 |
#define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \
|
| 614 |
vec_extract_fp32_from_shorth(vec_xl(0, p - GGML_F16_EPR)) : \
|
| 615 |
vec_extract_fp32_from_shortl(vec_xl(0, p))
|
| 616 |
+
#define GGML_ENDIAN_BYTE(i) ((unsigned char *)&(uint16_t){1})[i]
|
| 617 |
+
#define GGML_F16_VEC_STORE(p, r, i) \
|
| 618 |
+
if (i & 0x1) \
|
| 619 |
+
vec_xst(vec_pack_to_short_fp32(r[i - GGML_ENDIAN_BYTE(1)], \
|
| 620 |
+
r[i - GGML_ENDIAN_BYTE(0)]), \
|
| 621 |
+
0, p - GGML_F16_EPR)
|
| 622 |
|
| 623 |
#elif defined(__wasm_simd128__)
|
| 624 |
|
whisper.cpp
CHANGED
|
@@ -17,6 +17,68 @@
|
|
| 17 |
#include <regex>
|
| 18 |
#include <random>
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
#define WHISPER_ASSERT(x) \
|
| 21 |
do { \
|
| 22 |
if (!(x)) { \
|
|
@@ -521,6 +583,7 @@ struct whisper_context {
|
|
| 521 |
template<typename T>
|
| 522 |
static void read_safe(whisper_model_loader * loader, T & dest) {
|
| 523 |
loader->read(loader->context, &dest, sizeof(T));
|
|
|
|
| 524 |
}
|
| 525 |
|
| 526 |
static bool kv_cache_init(
|
|
@@ -733,6 +796,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
|
|
| 733 |
|
| 734 |
filters.data.resize(filters.n_mel * filters.n_fft);
|
| 735 |
loader->read(loader->context, filters.data.data(), filters.data.size() * sizeof(float));
|
|
|
|
| 736 |
}
|
| 737 |
|
| 738 |
// load vocab
|
|
@@ -1196,6 +1260,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
|
|
| 1196 |
}
|
| 1197 |
|
| 1198 |
loader->read(loader->context, tensor->data, ggml_nbytes(tensor));
|
|
|
|
| 1199 |
|
| 1200 |
//printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
| 1201 |
total_size += ggml_nbytes(tensor);
|
|
|
|
| 17 |
#include <regex>
|
| 18 |
#include <random>
|
| 19 |
|
| 20 |
+
#if defined(GGML_BIG_ENDIAN)
|
| 21 |
+
#include <bit>
|
| 22 |
+
|
| 23 |
+
template<typename T>
|
| 24 |
+
static T byteswap(T value) {
|
| 25 |
+
return std::byteswap(value);
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
template<>
|
| 29 |
+
float byteswap(float value) {
|
| 30 |
+
return std::bit_cast<float>(byteswap(std::bit_cast<std::uint32_t>(value)));
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
template<typename T>
|
| 34 |
+
static void byteswap_tensor_data(ggml_tensor * tensor) {
|
| 35 |
+
T * datum = reinterpret_cast<T *>(tensor->data);
|
| 36 |
+
for (int i = 0; i < ggml_nelements(tensor); i++) {
|
| 37 |
+
datum[i] = byteswap(datum[i]);
|
| 38 |
+
}
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
static void byteswap_tensor(ggml_tensor * tensor) {
|
| 42 |
+
switch (tensor->type) {
|
| 43 |
+
case GGML_TYPE_I16: {
|
| 44 |
+
byteswap_tensor_data<int16_t>(tensor);
|
| 45 |
+
break;
|
| 46 |
+
}
|
| 47 |
+
case GGML_TYPE_F16: {
|
| 48 |
+
byteswap_tensor_data<ggml_fp16_t>(tensor);
|
| 49 |
+
break;
|
| 50 |
+
}
|
| 51 |
+
case GGML_TYPE_I32: {
|
| 52 |
+
byteswap_tensor_data<int32_t>(tensor);
|
| 53 |
+
break;
|
| 54 |
+
}
|
| 55 |
+
case GGML_TYPE_F32: {
|
| 56 |
+
byteswap_tensor_data<float>(tensor);
|
| 57 |
+
break;
|
| 58 |
+
}
|
| 59 |
+
default: { // GML_TYPE_I8
|
| 60 |
+
break;
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
#define BYTESWAP_VALUE(d) d = byteswap(d)
|
| 66 |
+
#define BYTESWAP_FILTERS(f) \
|
| 67 |
+
do { \
|
| 68 |
+
for (auto & datum : f.data) { \
|
| 69 |
+
datum = byteswap(datum); \
|
| 70 |
+
} \
|
| 71 |
+
} while (0)
|
| 72 |
+
#define BYTESWAP_TENSOR(t) \
|
| 73 |
+
do { \
|
| 74 |
+
byteswap_tensor(tensor); \
|
| 75 |
+
} while (0)
|
| 76 |
+
#else
|
| 77 |
+
#define BYTESWAP_VALUE(d) do {} while (0)
|
| 78 |
+
#define BYTESWAP_FILTERS(f) do {} while (0)
|
| 79 |
+
#define BYTESWAP_TENSOR(t) do {} while (0)
|
| 80 |
+
#endif
|
| 81 |
+
|
| 82 |
#define WHISPER_ASSERT(x) \
|
| 83 |
do { \
|
| 84 |
if (!(x)) { \
|
|
|
|
| 583 |
template<typename T>
|
| 584 |
static void read_safe(whisper_model_loader * loader, T & dest) {
|
| 585 |
loader->read(loader->context, &dest, sizeof(T));
|
| 586 |
+
BYTESWAP_VALUE(dest);
|
| 587 |
}
|
| 588 |
|
| 589 |
static bool kv_cache_init(
|
|
|
|
| 796 |
|
| 797 |
filters.data.resize(filters.n_mel * filters.n_fft);
|
| 798 |
loader->read(loader->context, filters.data.data(), filters.data.size() * sizeof(float));
|
| 799 |
+
BYTESWAP_FILTERS(filters);
|
| 800 |
}
|
| 801 |
|
| 802 |
// load vocab
|
|
|
|
| 1260 |
}
|
| 1261 |
|
| 1262 |
loader->read(loader->context, tensor->data, ggml_nbytes(tensor));
|
| 1263 |
+
BYTESWAP_TENSOR(tensor);
|
| 1264 |
|
| 1265 |
//printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
| 1266 |
total_size += ggml_nbytes(tensor);
|