fitzsim commited on
Commit
239569b
·
unverified ·
1 Parent(s): cbcc851

whisper : PPC64 big-endian support (#398)

Browse files

* ggml : set cache line size to 128 on POWER9

* whisper : add PPC64 big endian support

Files changed (3) hide show
  1. Makefile +5 -1
  2. ggml.c +10 -3
  3. whisper.cpp +65 -0
Makefile CHANGED
@@ -115,11 +115,15 @@ endif
115
  ifeq ($(UNAME_M),amd64)
116
  CFLAGS += -mavx -mavx2 -mfma -mf16c
117
  endif
118
- ifeq ($(UNAME_M),ppc64le)
119
  POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
120
  ifneq (,$(findstring POWER9,$(POWER9_M)))
121
  CFLAGS += -mpower9-vector
122
  endif
 
 
 
 
123
  endif
124
  ifndef WHISPER_NO_ACCELERATE
125
  # Mac M1 - include Accelerate framework
 
115
  ifeq ($(UNAME_M),amd64)
116
  CFLAGS += -mavx -mavx2 -mfma -mf16c
117
  endif
118
+ ifneq ($(filter ppc64%,$(UNAME_M)),)
119
  POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
120
  ifneq (,$(findstring POWER9,$(POWER9_M)))
121
  CFLAGS += -mpower9-vector
122
  endif
123
+ # Require c++23's std::byteswap for big-endian support.
124
+ ifeq ($(UNAME_M),ppc64)
125
+ CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
126
+ endif
127
  endif
128
  ifndef WHISPER_NO_ACCELERATE
129
  # Mac M1 - include Accelerate framework
ggml.c CHANGED
@@ -339,8 +339,12 @@ int64_t ggml_cycles_per_ms(void) {
339
  #if defined(__cpp_lib_hardware_interference_size)
340
  #define CACHE_LINE_SIZE hardware_destructive_interference_size
341
  #else
 
 
 
342
  #define CACHE_LINE_SIZE 64
343
  #endif
 
344
 
345
  static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
346
 
@@ -609,9 +613,12 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
609
  #define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \
610
  vec_extract_fp32_from_shorth(vec_xl(0, p - GGML_F16_EPR)) : \
611
  vec_extract_fp32_from_shortl(vec_xl(0, p))
612
- #define GGML_F16_VEC_STORE(p, r, i) \
613
- if (i & 0x1) \
614
- vec_xst(vec_pack_to_short_fp32(r[i], r[i - 1]), 0, p - GGML_F16_EPR)
 
 
 
615
 
616
  #elif defined(__wasm_simd128__)
617
 
 
339
  #if defined(__cpp_lib_hardware_interference_size)
340
  #define CACHE_LINE_SIZE hardware_destructive_interference_size
341
  #else
342
+ #if defined(__POWER9_VECTOR__)
343
+ #define CACHE_LINE_SIZE 128
344
+ #else
345
  #define CACHE_LINE_SIZE 64
346
  #endif
347
+ #endif
348
 
349
  static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
350
 
 
613
  #define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \
614
  vec_extract_fp32_from_shorth(vec_xl(0, p - GGML_F16_EPR)) : \
615
  vec_extract_fp32_from_shortl(vec_xl(0, p))
616
+ #define GGML_ENDIAN_BYTE(i) ((unsigned char *)&(uint16_t){1})[i]
617
+ #define GGML_F16_VEC_STORE(p, r, i) \
618
+ if (i & 0x1) \
619
+ vec_xst(vec_pack_to_short_fp32(r[i - GGML_ENDIAN_BYTE(1)], \
620
+ r[i - GGML_ENDIAN_BYTE(0)]), \
621
+ 0, p - GGML_F16_EPR)
622
 
623
  #elif defined(__wasm_simd128__)
624
 
whisper.cpp CHANGED
@@ -17,6 +17,68 @@
17
  #include <regex>
18
  #include <random>
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  #define WHISPER_ASSERT(x) \
21
  do { \
22
  if (!(x)) { \
@@ -521,6 +583,7 @@ struct whisper_context {
521
  template<typename T>
522
  static void read_safe(whisper_model_loader * loader, T & dest) {
523
  loader->read(loader->context, &dest, sizeof(T));
 
524
  }
525
 
526
  static bool kv_cache_init(
@@ -733,6 +796,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
733
 
734
  filters.data.resize(filters.n_mel * filters.n_fft);
735
  loader->read(loader->context, filters.data.data(), filters.data.size() * sizeof(float));
 
736
  }
737
 
738
  // load vocab
@@ -1196,6 +1260,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
1196
  }
1197
 
1198
  loader->read(loader->context, tensor->data, ggml_nbytes(tensor));
 
1199
 
1200
  //printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
1201
  total_size += ggml_nbytes(tensor);
 
17
  #include <regex>
18
  #include <random>
19
 
20
+ #if defined(GGML_BIG_ENDIAN)
21
+ #include <bit>
22
+
23
+ template<typename T>
24
+ static T byteswap(T value) {
25
+ return std::byteswap(value);
26
+ }
27
+
28
+ template<>
29
+ float byteswap(float value) {
30
+ return std::bit_cast<float>(byteswap(std::bit_cast<std::uint32_t>(value)));
31
+ }
32
+
33
+ template<typename T>
34
+ static void byteswap_tensor_data(ggml_tensor * tensor) {
35
+ T * datum = reinterpret_cast<T *>(tensor->data);
36
+ for (int i = 0; i < ggml_nelements(tensor); i++) {
37
+ datum[i] = byteswap(datum[i]);
38
+ }
39
+ }
40
+
41
+ static void byteswap_tensor(ggml_tensor * tensor) {
42
+ switch (tensor->type) {
43
+ case GGML_TYPE_I16: {
44
+ byteswap_tensor_data<int16_t>(tensor);
45
+ break;
46
+ }
47
+ case GGML_TYPE_F16: {
48
+ byteswap_tensor_data<ggml_fp16_t>(tensor);
49
+ break;
50
+ }
51
+ case GGML_TYPE_I32: {
52
+ byteswap_tensor_data<int32_t>(tensor);
53
+ break;
54
+ }
55
+ case GGML_TYPE_F32: {
56
+ byteswap_tensor_data<float>(tensor);
57
+ break;
58
+ }
59
+ default: { // GML_TYPE_I8
60
+ break;
61
+ }
62
+ }
63
+ }
64
+
65
+ #define BYTESWAP_VALUE(d) d = byteswap(d)
66
+ #define BYTESWAP_FILTERS(f) \
67
+ do { \
68
+ for (auto & datum : f.data) { \
69
+ datum = byteswap(datum); \
70
+ } \
71
+ } while (0)
72
+ #define BYTESWAP_TENSOR(t) \
73
+ do { \
74
+ byteswap_tensor(tensor); \
75
+ } while (0)
76
+ #else
77
+ #define BYTESWAP_VALUE(d) do {} while (0)
78
+ #define BYTESWAP_FILTERS(f) do {} while (0)
79
+ #define BYTESWAP_TENSOR(t) do {} while (0)
80
+ #endif
81
+
82
  #define WHISPER_ASSERT(x) \
83
  do { \
84
  if (!(x)) { \
 
583
  template<typename T>
584
  static void read_safe(whisper_model_loader * loader, T & dest) {
585
  loader->read(loader->context, &dest, sizeof(T));
586
+ BYTESWAP_VALUE(dest);
587
  }
588
 
589
  static bool kv_cache_init(
 
796
 
797
  filters.data.resize(filters.n_mel * filters.n_fft);
798
  loader->read(loader->context, filters.data.data(), filters.data.size() * sizeof(float));
799
+ BYTESWAP_FILTERS(filters);
800
  }
801
 
802
  // load vocab
 
1260
  }
1261
 
1262
  loader->read(loader->context, tensor->data, ggml_nbytes(tensor));
1263
+ BYTESWAP_TENSOR(tensor);
1264
 
1265
  //printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
1266
  total_size += ggml_nbytes(tensor);