ggerganov commited on
Commit
91ed6bd
·
unverified ·
1 Parent(s): 97246e8

quantize : add support for K-quant types

Browse files
Files changed (2) hide show
  1. bindings/ios +1 -1
  2. examples/common-ggml.cpp +16 -23
bindings/ios CHANGED
@@ -1 +1 @@
1
- Subproject commit b5a163decd5290a99806957905639c4456de97f5
 
1
+ Subproject commit c9d5095f0c64455b201f1cd0b547efcf093ee7c3
examples/common-ggml.cpp CHANGED
@@ -9,6 +9,11 @@ static const std::map<std::string, enum ggml_ftype> GGML_FTYPE_MAP = {
9
  {"q5_0", GGML_FTYPE_MOSTLY_Q5_0},
10
  {"q5_1", GGML_FTYPE_MOSTLY_Q5_1},
11
  {"q8_0", GGML_FTYPE_MOSTLY_Q8_0},
 
 
 
 
 
12
  };
13
 
14
  void ggml_print_ftypes(FILE * fp) {
@@ -48,15 +53,15 @@ bool ggml_common_quantize_0(
48
  case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0; break;
49
  case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1; break;
50
  case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0; break;
 
 
 
 
 
51
  case GGML_FTYPE_UNKNOWN:
52
  case GGML_FTYPE_ALL_F32:
53
  case GGML_FTYPE_MOSTLY_F16:
54
  case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
55
- case GGML_FTYPE_MOSTLY_Q2_K:
56
- case GGML_FTYPE_MOSTLY_Q3_K:
57
- case GGML_FTYPE_MOSTLY_Q4_K:
58
- case GGML_FTYPE_MOSTLY_Q5_K:
59
- case GGML_FTYPE_MOSTLY_Q6_K:
60
  {
61
  fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
62
  return false;
@@ -167,24 +172,17 @@ bool ggml_common_quantize_0(
167
 
168
  switch ((ggml_type) ttype) {
169
  case GGML_TYPE_Q4_0:
170
- {
171
- cur_size = ggml_quantize_q4_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
172
- } break;
173
  case GGML_TYPE_Q4_1:
174
- {
175
- cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
176
- } break;
177
  case GGML_TYPE_Q5_0:
178
- {
179
- cur_size = ggml_quantize_q5_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
180
- } break;
181
  case GGML_TYPE_Q5_1:
182
- {
183
- cur_size = ggml_quantize_q5_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
184
- } break;
185
  case GGML_TYPE_Q8_0:
 
 
 
 
 
186
  {
187
- cur_size = ggml_quantize_q8_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
188
  } break;
189
  case GGML_TYPE_F32:
190
  case GGML_TYPE_F16:
@@ -192,11 +190,6 @@ bool ggml_common_quantize_0(
192
  case GGML_TYPE_I16:
193
  case GGML_TYPE_I32:
194
  case GGML_TYPE_Q8_1:
195
- case GGML_TYPE_Q2_K:
196
- case GGML_TYPE_Q3_K:
197
- case GGML_TYPE_Q4_K:
198
- case GGML_TYPE_Q5_K:
199
- case GGML_TYPE_Q6_K:
200
  case GGML_TYPE_Q8_K:
201
  case GGML_TYPE_COUNT:
202
  {
 
9
  {"q5_0", GGML_FTYPE_MOSTLY_Q5_0},
10
  {"q5_1", GGML_FTYPE_MOSTLY_Q5_1},
11
  {"q8_0", GGML_FTYPE_MOSTLY_Q8_0},
12
+ {"q2_k", GGML_FTYPE_MOSTLY_Q2_K},
13
+ {"q3_k", GGML_FTYPE_MOSTLY_Q3_K},
14
+ {"q4_k", GGML_FTYPE_MOSTLY_Q4_K},
15
+ {"q5_k", GGML_FTYPE_MOSTLY_Q5_K},
16
+ {"q6_k", GGML_FTYPE_MOSTLY_Q6_K},
17
  };
18
 
19
  void ggml_print_ftypes(FILE * fp) {
 
53
  case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0; break;
54
  case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1; break;
55
  case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0; break;
56
+ case GGML_FTYPE_MOSTLY_Q2_K: qtype = GGML_TYPE_Q2_K; break;
57
+ case GGML_FTYPE_MOSTLY_Q3_K: qtype = GGML_TYPE_Q3_K; break;
58
+ case GGML_FTYPE_MOSTLY_Q4_K: qtype = GGML_TYPE_Q4_K; break;
59
+ case GGML_FTYPE_MOSTLY_Q5_K: qtype = GGML_TYPE_Q5_K; break;
60
+ case GGML_FTYPE_MOSTLY_Q6_K: qtype = GGML_TYPE_Q6_K; break;
61
  case GGML_FTYPE_UNKNOWN:
62
  case GGML_FTYPE_ALL_F32:
63
  case GGML_FTYPE_MOSTLY_F16:
64
  case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
 
 
 
 
 
65
  {
66
  fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
67
  return false;
 
172
 
173
  switch ((ggml_type) ttype) {
174
  case GGML_TYPE_Q4_0:
 
 
 
175
  case GGML_TYPE_Q4_1:
 
 
 
176
  case GGML_TYPE_Q5_0:
 
 
 
177
  case GGML_TYPE_Q5_1:
 
 
 
178
  case GGML_TYPE_Q8_0:
179
+ case GGML_TYPE_Q2_K:
180
+ case GGML_TYPE_Q3_K:
181
+ case GGML_TYPE_Q4_K:
182
+ case GGML_TYPE_Q5_K:
183
+ case GGML_TYPE_Q6_K:
184
  {
185
+ cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements, hist_cur.data());
186
  } break;
187
  case GGML_TYPE_F32:
188
  case GGML_TYPE_F16:
 
190
  case GGML_TYPE_I16:
191
  case GGML_TYPE_I32:
192
  case GGML_TYPE_Q8_1:
 
 
 
 
 
193
  case GGML_TYPE_Q8_K:
194
  case GGML_TYPE_COUNT:
195
  {