Spaces:
Running
Running
llamafile: use member variable instead of constant for iq4nlt (llama/11780)
Browse files
ggml/src/ggml-cpu/llamafile/sgemm.cpp
CHANGED
|
@@ -280,14 +280,6 @@ template <> inline __m256bh load(const float *p) {
|
|
| 280 |
}
|
| 281 |
#endif
|
| 282 |
|
| 283 |
-
////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 284 |
-
// CONSTANTS
|
| 285 |
-
|
| 286 |
-
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
|
| 287 |
-
static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
| 288 |
-
static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
|
| 289 |
-
#endif
|
| 290 |
-
|
| 291 |
////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 292 |
// FLOATING POINT MATRIX MULTIPLICATION
|
| 293 |
|
|
@@ -614,6 +606,14 @@ class tinyBLAS_Q0_AVX {
|
|
| 614 |
TC *C, int64_t ldc,
|
| 615 |
int ith, int nth)
|
| 616 |
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
}
|
| 618 |
|
| 619 |
void matmul(int64_t m, int64_t n) {
|
|
@@ -1038,6 +1038,7 @@ class tinyBLAS_Q0_AVX {
|
|
| 1038 |
const int64_t ldc;
|
| 1039 |
const int ith;
|
| 1040 |
const int nth;
|
|
|
|
| 1041 |
};
|
| 1042 |
#endif // __AVX__
|
| 1043 |
|
|
|
|
| 280 |
}
|
| 281 |
#endif
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
////////////////////////////////////////////////////////////////////////////////////////////////////
|
| 284 |
// FLOATING POINT MATRIX MULTIPLICATION
|
| 285 |
|
|
|
|
| 606 |
TC *C, int64_t ldc,
|
| 607 |
int ith, int nth)
|
| 608 |
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
| 609 |
+
const int8_t kvalues_iq4nl[16] = {
|
| 610 |
+
-127, -104, -83, -65,
|
| 611 |
+
-49, -35, -22, -10,
|
| 612 |
+
1, 13, 25, 38,
|
| 613 |
+
53, 69, 89, 113
|
| 614 |
+
};
|
| 615 |
+
|
| 616 |
+
iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
|
| 617 |
}
|
| 618 |
|
| 619 |
void matmul(int64_t m, int64_t n) {
|
|
|
|
| 1038 |
const int64_t ldc;
|
| 1039 |
const int ith;
|
| 1040 |
const int nth;
|
| 1041 |
+
__m128i iq4nlt;
|
| 1042 |
};
|
| 1043 |
#endif // __AVX__
|
| 1044 |
|