Spaces:
Sleeping
Sleeping
quantize : add support for K-quant types
Browse files- bindings/ios +1 -1
- examples/common-ggml.cpp +16 -23
bindings/ios
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
Subproject commit
|
|
|
|
| 1 |
+
Subproject commit c9d5095f0c64455b201f1cd0b547efcf093ee7c3
|
examples/common-ggml.cpp
CHANGED
|
@@ -9,6 +9,11 @@ static const std::map<std::string, enum ggml_ftype> GGML_FTYPE_MAP = {
|
|
| 9 |
{"q5_0", GGML_FTYPE_MOSTLY_Q5_0},
|
| 10 |
{"q5_1", GGML_FTYPE_MOSTLY_Q5_1},
|
| 11 |
{"q8_0", GGML_FTYPE_MOSTLY_Q8_0},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
};
|
| 13 |
|
| 14 |
void ggml_print_ftypes(FILE * fp) {
|
|
@@ -48,15 +53,15 @@ bool ggml_common_quantize_0(
|
|
| 48 |
case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0; break;
|
| 49 |
case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1; break;
|
| 50 |
case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0; break;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
case GGML_FTYPE_UNKNOWN:
|
| 52 |
case GGML_FTYPE_ALL_F32:
|
| 53 |
case GGML_FTYPE_MOSTLY_F16:
|
| 54 |
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
|
| 55 |
-
case GGML_FTYPE_MOSTLY_Q2_K:
|
| 56 |
-
case GGML_FTYPE_MOSTLY_Q3_K:
|
| 57 |
-
case GGML_FTYPE_MOSTLY_Q4_K:
|
| 58 |
-
case GGML_FTYPE_MOSTLY_Q5_K:
|
| 59 |
-
case GGML_FTYPE_MOSTLY_Q6_K:
|
| 60 |
{
|
| 61 |
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
|
| 62 |
return false;
|
|
@@ -167,24 +172,17 @@ bool ggml_common_quantize_0(
|
|
| 167 |
|
| 168 |
switch ((ggml_type) ttype) {
|
| 169 |
case GGML_TYPE_Q4_0:
|
| 170 |
-
{
|
| 171 |
-
cur_size = ggml_quantize_q4_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
|
| 172 |
-
} break;
|
| 173 |
case GGML_TYPE_Q4_1:
|
| 174 |
-
{
|
| 175 |
-
cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
|
| 176 |
-
} break;
|
| 177 |
case GGML_TYPE_Q5_0:
|
| 178 |
-
{
|
| 179 |
-
cur_size = ggml_quantize_q5_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
|
| 180 |
-
} break;
|
| 181 |
case GGML_TYPE_Q5_1:
|
| 182 |
-
{
|
| 183 |
-
cur_size = ggml_quantize_q5_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
|
| 184 |
-
} break;
|
| 185 |
case GGML_TYPE_Q8_0:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
{
|
| 187 |
-
cur_size =
|
| 188 |
} break;
|
| 189 |
case GGML_TYPE_F32:
|
| 190 |
case GGML_TYPE_F16:
|
|
@@ -192,11 +190,6 @@ bool ggml_common_quantize_0(
|
|
| 192 |
case GGML_TYPE_I16:
|
| 193 |
case GGML_TYPE_I32:
|
| 194 |
case GGML_TYPE_Q8_1:
|
| 195 |
-
case GGML_TYPE_Q2_K:
|
| 196 |
-
case GGML_TYPE_Q3_K:
|
| 197 |
-
case GGML_TYPE_Q4_K:
|
| 198 |
-
case GGML_TYPE_Q5_K:
|
| 199 |
-
case GGML_TYPE_Q6_K:
|
| 200 |
case GGML_TYPE_Q8_K:
|
| 201 |
case GGML_TYPE_COUNT:
|
| 202 |
{
|
|
|
|
| 9 |
{"q5_0", GGML_FTYPE_MOSTLY_Q5_0},
|
| 10 |
{"q5_1", GGML_FTYPE_MOSTLY_Q5_1},
|
| 11 |
{"q8_0", GGML_FTYPE_MOSTLY_Q8_0},
|
| 12 |
+
{"q2_k", GGML_FTYPE_MOSTLY_Q2_K},
|
| 13 |
+
{"q3_k", GGML_FTYPE_MOSTLY_Q3_K},
|
| 14 |
+
{"q4_k", GGML_FTYPE_MOSTLY_Q4_K},
|
| 15 |
+
{"q5_k", GGML_FTYPE_MOSTLY_Q5_K},
|
| 16 |
+
{"q6_k", GGML_FTYPE_MOSTLY_Q6_K},
|
| 17 |
};
|
| 18 |
|
| 19 |
void ggml_print_ftypes(FILE * fp) {
|
|
|
|
| 53 |
case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0; break;
|
| 54 |
case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1; break;
|
| 55 |
case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0; break;
|
| 56 |
+
case GGML_FTYPE_MOSTLY_Q2_K: qtype = GGML_TYPE_Q2_K; break;
|
| 57 |
+
case GGML_FTYPE_MOSTLY_Q3_K: qtype = GGML_TYPE_Q3_K; break;
|
| 58 |
+
case GGML_FTYPE_MOSTLY_Q4_K: qtype = GGML_TYPE_Q4_K; break;
|
| 59 |
+
case GGML_FTYPE_MOSTLY_Q5_K: qtype = GGML_TYPE_Q5_K; break;
|
| 60 |
+
case GGML_FTYPE_MOSTLY_Q6_K: qtype = GGML_TYPE_Q6_K; break;
|
| 61 |
case GGML_FTYPE_UNKNOWN:
|
| 62 |
case GGML_FTYPE_ALL_F32:
|
| 63 |
case GGML_FTYPE_MOSTLY_F16:
|
| 64 |
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
{
|
| 66 |
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
|
| 67 |
return false;
|
|
|
|
| 172 |
|
| 173 |
switch ((ggml_type) ttype) {
|
| 174 |
case GGML_TYPE_Q4_0:
|
|
|
|
|
|
|
|
|
|
| 175 |
case GGML_TYPE_Q4_1:
|
|
|
|
|
|
|
|
|
|
| 176 |
case GGML_TYPE_Q5_0:
|
|
|
|
|
|
|
|
|
|
| 177 |
case GGML_TYPE_Q5_1:
|
|
|
|
|
|
|
|
|
|
| 178 |
case GGML_TYPE_Q8_0:
|
| 179 |
+
case GGML_TYPE_Q2_K:
|
| 180 |
+
case GGML_TYPE_Q3_K:
|
| 181 |
+
case GGML_TYPE_Q4_K:
|
| 182 |
+
case GGML_TYPE_Q5_K:
|
| 183 |
+
case GGML_TYPE_Q6_K:
|
| 184 |
{
|
| 185 |
+
cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements, hist_cur.data());
|
| 186 |
} break;
|
| 187 |
case GGML_TYPE_F32:
|
| 188 |
case GGML_TYPE_F16:
|
|
|
|
| 190 |
case GGML_TYPE_I16:
|
| 191 |
case GGML_TYPE_I32:
|
| 192 |
case GGML_TYPE_Q8_1:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
case GGML_TYPE_Q8_K:
|
| 194 |
case GGML_TYPE_COUNT:
|
| 195 |
{
|