jmorganca commited on
Commit
0cb2d04
·
1 Parent(s): ff2d3eb

llamafile: use member variable instead of constant for iq4nlt (llama/11780)

Browse files
ggml/src/ggml-cpu/llamafile/sgemm.cpp CHANGED
@@ -280,14 +280,6 @@ template <> inline __m256bh load(const float *p) {
280
  }
281
  #endif
282
 
283
- ////////////////////////////////////////////////////////////////////////////////////////////////////
284
- // CONSTANTS
285
-
286
- #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
287
- static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
288
- static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
289
- #endif
290
-
291
  ////////////////////////////////////////////////////////////////////////////////////////////////////
292
  // FLOATING POINT MATRIX MULTIPLICATION
293
 
@@ -614,6 +606,14 @@ class tinyBLAS_Q0_AVX {
614
  TC *C, int64_t ldc,
615
  int ith, int nth)
616
  : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
 
 
 
 
 
 
 
 
617
  }
618
 
619
  void matmul(int64_t m, int64_t n) {
@@ -1038,6 +1038,7 @@ class tinyBLAS_Q0_AVX {
1038
  const int64_t ldc;
1039
  const int ith;
1040
  const int nth;
 
1041
  };
1042
  #endif // __AVX__
1043
 
 
280
  }
281
  #endif
282
 
 
 
 
 
 
 
 
 
283
  ////////////////////////////////////////////////////////////////////////////////////////////////////
284
  // FLOATING POINT MATRIX MULTIPLICATION
285
 
 
606
  TC *C, int64_t ldc,
607
  int ith, int nth)
608
  : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
609
+ const int8_t kvalues_iq4nl[16] = {
610
+ -127, -104, -83, -65,
611
+ -49, -35, -22, -10,
612
+ 1, 13, 25, 38,
613
+ 53, 69, 89, 113
614
+ };
615
+
616
+ iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
617
  }
618
 
619
  void matmul(int64_t m, int64_t n) {
 
1038
  const int64_t ldc;
1039
  const int ith;
1040
  const int nth;
1041
+ __m128i iq4nlt;
1042
  };
1043
  #endif // __AVX__
1044