Spaces:
Running
Running
slaren
commited on
Commit
·
71e001c
1
Parent(s):
d8fb433
add basic tensor data validation function (llama/6884)
Browse files* add basic tensor data validation function
* add --check-tensors command line argument
tensor validation is disabled by default and can be enabled by adding
`--check-tensors` to the command line arguments.
quantize always validates tensors.
- ggml-quants.c +284 -0
- ggml.h +2 -0
ggml-quants.c
CHANGED
|
@@ -12389,3 +12389,287 @@ void quantize_row_iq2_s(const float * restrict x, void * restrict vy, int64_t k)
|
|
| 12389 |
block_iq2_s * restrict y = vy;
|
| 12390 |
quantize_row_iq2_s_reference(x, y, k);
|
| 12391 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12389 |
block_iq2_s * restrict y = vy;
|
| 12390 |
quantize_row_iq2_s_reference(x, y, k);
|
| 12391 |
}
|
| 12392 |
+
|
| 12393 |
+
static bool validate_float(float f, size_t i) {
|
| 12394 |
+
if (isinf(f)) {
|
| 12395 |
+
fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
|
| 12396 |
+
return false;
|
| 12397 |
+
}
|
| 12398 |
+
|
| 12399 |
+
if (isnan(f)) {
|
| 12400 |
+
fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
|
| 12401 |
+
return false;
|
| 12402 |
+
}
|
| 12403 |
+
|
| 12404 |
+
return true;
|
| 12405 |
+
}
|
| 12406 |
+
|
| 12407 |
+
static bool isinf_fp16(ggml_fp16_t f) {
|
| 12408 |
+
return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) == 0;
|
| 12409 |
+
}
|
| 12410 |
+
|
| 12411 |
+
static bool isnan_fp16(ggml_fp16_t f) {
|
| 12412 |
+
return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) != 0;
|
| 12413 |
+
}
|
| 12414 |
+
|
| 12415 |
+
static bool validate_fp16(ggml_fp16_t f, size_t i) {
|
| 12416 |
+
if (isinf_fp16(f)) {
|
| 12417 |
+
fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
|
| 12418 |
+
return false;
|
| 12419 |
+
}
|
| 12420 |
+
|
| 12421 |
+
if (isnan_fp16(f)) {
|
| 12422 |
+
fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
|
| 12423 |
+
return false;
|
| 12424 |
+
}
|
| 12425 |
+
|
| 12426 |
+
return true;
|
| 12427 |
+
}
|
| 12428 |
+
|
| 12429 |
+
#define VALIDATE_ROW_DATA_D_F16_IMPL(type, data, nb) \
|
| 12430 |
+
const type * q = (const type *) (data); \
|
| 12431 |
+
for (size_t i = 0; i < (nb); ++i) { \
|
| 12432 |
+
if (!validate_fp16(q[i].d, i)) { \
|
| 12433 |
+
return false; \
|
| 12434 |
+
} \
|
| 12435 |
+
}
|
| 12436 |
+
|
| 12437 |
+
#define VALIDATE_ROW_DATA_DM_F16_IMPL(type, data, nb, d, m) \
|
| 12438 |
+
const type * q = (const type *) (data); \
|
| 12439 |
+
for (size_t i = 0; i < (nb); ++i) { \
|
| 12440 |
+
if (!validate_fp16(q[i].d, i) || !validate_fp16(q[i].m, i)) { \
|
| 12441 |
+
return false; \
|
| 12442 |
+
} \
|
| 12443 |
+
}
|
| 12444 |
+
|
| 12445 |
+
bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes) {
|
| 12446 |
+
if (type < 0 || type >= GGML_TYPE_COUNT) {
|
| 12447 |
+
fprintf(stderr, "%s: invalid type %d\n", __func__, type);
|
| 12448 |
+
return false;
|
| 12449 |
+
}
|
| 12450 |
+
|
| 12451 |
+
if (nbytes % ggml_type_size(type) != 0) {
|
| 12452 |
+
fprintf(stderr, "%s: invalid size %zu for type %d\n", __func__, nbytes, type);
|
| 12453 |
+
return false;
|
| 12454 |
+
}
|
| 12455 |
+
|
| 12456 |
+
const size_t nb = nbytes/ggml_type_size(type);
|
| 12457 |
+
|
| 12458 |
+
switch (type) {
|
| 12459 |
+
case GGML_TYPE_F16:
|
| 12460 |
+
{
|
| 12461 |
+
const ggml_fp16_t * f = (const ggml_fp16_t *) data;
|
| 12462 |
+
size_t i = 0;
|
| 12463 |
+
#if defined(__AVX2__)
|
| 12464 |
+
for (; i + 15 < nb; i += 16) {
|
| 12465 |
+
__m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
|
| 12466 |
+
__m256i vexp = _mm256_and_si256(v, _mm256_set1_epi16(0x7c00));
|
| 12467 |
+
__m256i cmp = _mm256_cmpeq_epi16(vexp, _mm256_set1_epi16(0x7c00));
|
| 12468 |
+
int mask = _mm256_movemask_epi8(cmp);
|
| 12469 |
+
if (mask) {
|
| 12470 |
+
for (size_t j = 0; j < 16; ++j) {
|
| 12471 |
+
if (!validate_fp16(f[i + j], i + j)) {
|
| 12472 |
+
return false;
|
| 12473 |
+
}
|
| 12474 |
+
}
|
| 12475 |
+
GGML_UNREACHABLE();
|
| 12476 |
+
}
|
| 12477 |
+
}
|
| 12478 |
+
#elif defined(__ARM_NEON)
|
| 12479 |
+
for (; i + 7 < nb; i += 8) {
|
| 12480 |
+
uint16x8_t v = vld1q_u16(f + i);
|
| 12481 |
+
uint16x8_t vexp = vandq_u16(v, vdupq_n_u16(0x7c00));
|
| 12482 |
+
uint16x8_t cmp = vceqq_u16(vexp, vdupq_n_u16(0x7c00));
|
| 12483 |
+
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(cmp, 4)), 0);
|
| 12484 |
+
if (mask) {
|
| 12485 |
+
for (size_t j = 0; j < 8; ++j) {
|
| 12486 |
+
if (!validate_fp16(f[i + j], i + j)) {
|
| 12487 |
+
return false;
|
| 12488 |
+
}
|
| 12489 |
+
}
|
| 12490 |
+
GGML_UNREACHABLE();
|
| 12491 |
+
}
|
| 12492 |
+
}
|
| 12493 |
+
#endif
|
| 12494 |
+
for (; i < nb; ++i) {
|
| 12495 |
+
if (!validate_fp16(f[i], i)) {
|
| 12496 |
+
return false;
|
| 12497 |
+
}
|
| 12498 |
+
}
|
| 12499 |
+
} break;
|
| 12500 |
+
case GGML_TYPE_F32:
|
| 12501 |
+
{
|
| 12502 |
+
const float * f = (const float *) data;
|
| 12503 |
+
size_t i = 0;
|
| 12504 |
+
#if defined(__AVX2__)
|
| 12505 |
+
for (; i + 7 < nb; i += 8) {
|
| 12506 |
+
__m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
|
| 12507 |
+
__m256i vexp = _mm256_and_si256(v, _mm256_set1_epi32(0x7f800000));
|
| 12508 |
+
__m256i cmp = _mm256_cmpeq_epi32(vexp, _mm256_set1_epi32(0x7f800000));
|
| 12509 |
+
int mask = _mm256_movemask_epi8(cmp);
|
| 12510 |
+
if (mask) {
|
| 12511 |
+
for (size_t j = 0; j < 8; ++j) {
|
| 12512 |
+
if (!validate_float(f[i + j], i + j)) {
|
| 12513 |
+
return false;
|
| 12514 |
+
}
|
| 12515 |
+
}
|
| 12516 |
+
GGML_UNREACHABLE();
|
| 12517 |
+
}
|
| 12518 |
+
}
|
| 12519 |
+
#elif defined(__ARM_NEON)
|
| 12520 |
+
for (; i + 3 < nb; i += 4) {
|
| 12521 |
+
uint32x4_t v = vld1q_u32((const uint32_t *)f + i);
|
| 12522 |
+
uint32x4_t vexp = vandq_u32(v, vdupq_n_u32(0x7f800000));
|
| 12523 |
+
uint32x4_t cmp = vceqq_u32(vexp, vdupq_n_u32(0x7f800000));
|
| 12524 |
+
uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(vshrn_n_u32(cmp, 8)), 0);
|
| 12525 |
+
if (mask) {
|
| 12526 |
+
for (size_t j = 0; j < 4; ++j) {
|
| 12527 |
+
if (!validate_float(f[i + j], i + j)) {
|
| 12528 |
+
return false;
|
| 12529 |
+
}
|
| 12530 |
+
}
|
| 12531 |
+
GGML_UNREACHABLE();
|
| 12532 |
+
}
|
| 12533 |
+
}
|
| 12534 |
+
#endif
|
| 12535 |
+
for (; i < nb; ++i) {
|
| 12536 |
+
if (!validate_float(f[i], i)) {
|
| 12537 |
+
return false;
|
| 12538 |
+
}
|
| 12539 |
+
}
|
| 12540 |
+
} break;
|
| 12541 |
+
case GGML_TYPE_F64:
|
| 12542 |
+
{
|
| 12543 |
+
const double * f = (const double *) data;
|
| 12544 |
+
for (size_t i = 0; i < nb; ++i) {
|
| 12545 |
+
if (!validate_float(f[i], i)) {
|
| 12546 |
+
return false;
|
| 12547 |
+
}
|
| 12548 |
+
}
|
| 12549 |
+
} break;
|
| 12550 |
+
case GGML_TYPE_Q4_0:
|
| 12551 |
+
{
|
| 12552 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q4_0, data, nb);
|
| 12553 |
+
} break;
|
| 12554 |
+
case GGML_TYPE_Q4_1:
|
| 12555 |
+
{
|
| 12556 |
+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_1, data, nb, d, m);
|
| 12557 |
+
} break;
|
| 12558 |
+
case GGML_TYPE_Q5_0:
|
| 12559 |
+
{
|
| 12560 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_0, data, nb);
|
| 12561 |
+
} break;
|
| 12562 |
+
case GGML_TYPE_Q5_1:
|
| 12563 |
+
{
|
| 12564 |
+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_1, data, nb, d, m);
|
| 12565 |
+
} break;
|
| 12566 |
+
case GGML_TYPE_Q8_0:
|
| 12567 |
+
{
|
| 12568 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q8_0, data, nb);
|
| 12569 |
+
} break;
|
| 12570 |
+
case GGML_TYPE_Q2_K:
|
| 12571 |
+
{
|
| 12572 |
+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q2_K, data, nb, d, dmin);
|
| 12573 |
+
} break;
|
| 12574 |
+
case GGML_TYPE_Q3_K:
|
| 12575 |
+
{
|
| 12576 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q3_K, data, nb);
|
| 12577 |
+
} break;
|
| 12578 |
+
case GGML_TYPE_Q4_K:
|
| 12579 |
+
{
|
| 12580 |
+
#ifdef GGML_QKK_64
|
| 12581 |
+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d[0], d[1]);
|
| 12582 |
+
#else
|
| 12583 |
+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d, dmin);
|
| 12584 |
+
#endif
|
| 12585 |
+
} break;
|
| 12586 |
+
case GGML_TYPE_Q5_K:
|
| 12587 |
+
{
|
| 12588 |
+
#ifdef GGML_QKK_64
|
| 12589 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_K, data, nb);
|
| 12590 |
+
#else
|
| 12591 |
+
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_K, data, nb, d, dmin);
|
| 12592 |
+
#endif
|
| 12593 |
+
} break;
|
| 12594 |
+
case GGML_TYPE_Q6_K:
|
| 12595 |
+
{
|
| 12596 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_q6_K, data, nb);
|
| 12597 |
+
} break;
|
| 12598 |
+
case GGML_TYPE_Q8_K:
|
| 12599 |
+
{
|
| 12600 |
+
const block_q8_K * q = (const block_q8_K *) data;
|
| 12601 |
+
for (size_t i = 0; i < nb; ++i) {
|
| 12602 |
+
if (!validate_float(q[i].d, i)) {
|
| 12603 |
+
return false;
|
| 12604 |
+
}
|
| 12605 |
+
}
|
| 12606 |
+
} break;
|
| 12607 |
+
case GGML_TYPE_IQ1_S:
|
| 12608 |
+
{
|
| 12609 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq1_s, data, nb);
|
| 12610 |
+
} break;
|
| 12611 |
+
case GGML_TYPE_IQ1_M:
|
| 12612 |
+
{
|
| 12613 |
+
const block_iq1_m * q = (const block_iq1_m *) data;
|
| 12614 |
+
for (size_t i = 0; i < nb; ++i) {
|
| 12615 |
+
#if QK_K == 64
|
| 12616 |
+
if (!validate_fp16(q[i].d, i)) {
|
| 12617 |
+
return false;
|
| 12618 |
+
}
|
| 12619 |
+
#else
|
| 12620 |
+
iq1m_scale_t scale;
|
| 12621 |
+
const uint16_t * sc = (const uint16_t *)q[i].scales;
|
| 12622 |
+
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
| 12623 |
+
if (!validate_fp16(scale.f16, i)) {
|
| 12624 |
+
return false;
|
| 12625 |
+
}
|
| 12626 |
+
#endif
|
| 12627 |
+
}
|
| 12628 |
+
} break;
|
| 12629 |
+
case GGML_TYPE_IQ2_XXS:
|
| 12630 |
+
{
|
| 12631 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xxs, data, nb);
|
| 12632 |
+
} break;
|
| 12633 |
+
case GGML_TYPE_IQ2_XS:
|
| 12634 |
+
{
|
| 12635 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xs, data, nb);
|
| 12636 |
+
} break;
|
| 12637 |
+
case GGML_TYPE_IQ2_S:
|
| 12638 |
+
{
|
| 12639 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_s, data, nb);
|
| 12640 |
+
} break;
|
| 12641 |
+
case GGML_TYPE_IQ3_XXS:
|
| 12642 |
+
{
|
| 12643 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_xxs, data, nb);
|
| 12644 |
+
} break;
|
| 12645 |
+
|
| 12646 |
+
case GGML_TYPE_IQ3_S:
|
| 12647 |
+
{
|
| 12648 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_s, data, nb);
|
| 12649 |
+
} break;
|
| 12650 |
+
case GGML_TYPE_IQ4_XS:
|
| 12651 |
+
#if QK_K != 64
|
| 12652 |
+
{
|
| 12653 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_xs, data, nb);
|
| 12654 |
+
} break;
|
| 12655 |
+
#endif
|
| 12656 |
+
// with QK_K == 64, iq4_xs is iq4_nl
|
| 12657 |
+
case GGML_TYPE_IQ4_NL:
|
| 12658 |
+
{
|
| 12659 |
+
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_nl, data, nb);
|
| 12660 |
+
} break;
|
| 12661 |
+
case GGML_TYPE_I8:
|
| 12662 |
+
case GGML_TYPE_I16:
|
| 12663 |
+
case GGML_TYPE_I32:
|
| 12664 |
+
case GGML_TYPE_I64:
|
| 12665 |
+
// nothing to validate
|
| 12666 |
+
break;
|
| 12667 |
+
default:
|
| 12668 |
+
{
|
| 12669 |
+
fprintf(stderr, "%s: invalid type %d\n", __func__, type);
|
| 12670 |
+
return false;
|
| 12671 |
+
}
|
| 12672 |
+
}
|
| 12673 |
+
|
| 12674 |
+
return true;
|
| 12675 |
+
}
|
ggml.h
CHANGED
|
@@ -763,6 +763,8 @@ extern "C" {
|
|
| 763 |
// use this to compute the memory overhead of a tensor
|
| 764 |
GGML_API size_t ggml_tensor_overhead(void);
|
| 765 |
|
|
|
|
|
|
|
| 766 |
// main
|
| 767 |
|
| 768 |
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
|
|
|
| 763 |
// use this to compute the memory overhead of a tensor
|
| 764 |
GGML_API size_t ggml_tensor_overhead(void);
|
| 765 |
|
| 766 |
+
GGML_API bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes);
|
| 767 |
+
|
| 768 |
// main
|
| 769 |
|
| 770 |
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|