Spaces:
Sleeping
Sleeping
ggml-cpu : "align corners" for bilinear upscale/downscale (ggml/1285)
Browse files* add "align corners" mode for bilinear upscale, and allow downscaling
* add ggml_interpolate, deprecate ggml_upscale_ext, pass in align-corners as bit-flag
* test-backend-ops: replace ggml_upscale_ext with ggml_interpolate, add test cases for downscale and align-corners
- ggml/include/ggml.h +20 -2
- ggml/src/ggml-cpu/ops.cpp +12 -7
- ggml/src/ggml.c +24 -15
ggml/include/ggml.h
CHANGED
|
@@ -1765,6 +1765,12 @@ extern "C" {
|
|
| 1765 |
enum ggml_scale_mode {
|
| 1766 |
GGML_SCALE_MODE_NEAREST = 0,
|
| 1767 |
GGML_SCALE_MODE_BILINEAR = 1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1768 |
};
|
| 1769 |
|
| 1770 |
// interpolate
|
|
@@ -1777,14 +1783,26 @@ extern "C" {
|
|
| 1777 |
|
| 1778 |
// interpolate
|
| 1779 |
// interpolate scale to specified dimensions
|
| 1780 |
-
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
| 1781 |
struct ggml_context * ctx,
|
| 1782 |
struct ggml_tensor * a,
|
| 1783 |
int ne0,
|
| 1784 |
int ne1,
|
| 1785 |
int ne2,
|
| 1786 |
int ne3,
|
| 1787 |
-
enum ggml_scale_mode mode)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1788 |
|
| 1789 |
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
| 1790 |
GGML_API struct ggml_tensor * ggml_pad(
|
|
|
|
| 1765 |
enum ggml_scale_mode {
|
| 1766 |
GGML_SCALE_MODE_NEAREST = 0,
|
| 1767 |
GGML_SCALE_MODE_BILINEAR = 1,
|
| 1768 |
+
|
| 1769 |
+
GGML_SCALE_MODE_COUNT
|
| 1770 |
+
};
|
| 1771 |
+
|
| 1772 |
+
enum ggml_scale_flag {
|
| 1773 |
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
| 1774 |
};
|
| 1775 |
|
| 1776 |
// interpolate
|
|
|
|
| 1783 |
|
| 1784 |
// interpolate
|
| 1785 |
// interpolate scale to specified dimensions
|
| 1786 |
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
| 1787 |
struct ggml_context * ctx,
|
| 1788 |
struct ggml_tensor * a,
|
| 1789 |
int ne0,
|
| 1790 |
int ne1,
|
| 1791 |
int ne2,
|
| 1792 |
int ne3,
|
| 1793 |
+
enum ggml_scale_mode mode),
|
| 1794 |
+
"use ggml_interpolate instead");
|
| 1795 |
+
|
| 1796 |
+
// Up- or downsamples the input to the specified size.
|
| 1797 |
+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
| 1798 |
+
GGML_API struct ggml_tensor * ggml_interpolate(
|
| 1799 |
+
struct ggml_context * ctx,
|
| 1800 |
+
struct ggml_tensor * a,
|
| 1801 |
+
int64_t ne0,
|
| 1802 |
+
int64_t ne1,
|
| 1803 |
+
int64_t ne2,
|
| 1804 |
+
int64_t ne3,
|
| 1805 |
+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
| 1806 |
|
| 1807 |
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
| 1808 |
GGML_API struct ggml_tensor * ggml_pad(
|
ggml/src/ggml-cpu/ops.cpp
CHANGED
|
@@ -6608,12 +6608,13 @@ static void ggml_compute_forward_upscale_f32(
|
|
| 6608 |
|
| 6609 |
GGML_TENSOR_UNARY_OP_LOCALS
|
| 6610 |
|
| 6611 |
-
|
| 6612 |
-
|
| 6613 |
-
|
| 6614 |
-
|
| 6615 |
|
| 6616 |
-
const
|
|
|
|
| 6617 |
|
| 6618 |
if (mode == GGML_SCALE_MODE_NEAREST) {
|
| 6619 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
@@ -6634,8 +6635,12 @@ static void ggml_compute_forward_upscale_f32(
|
|
| 6634 |
}
|
| 6635 |
}
|
| 6636 |
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
|
| 6637 |
-
|
| 6638 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6639 |
|
| 6640 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 6641 |
const int64_t i03 = i3 / sf3;
|
|
|
|
| 6608 |
|
| 6609 |
GGML_TENSOR_UNARY_OP_LOCALS
|
| 6610 |
|
| 6611 |
+
float sf0 = (float)ne0/src0->ne[0];
|
| 6612 |
+
float sf1 = (float)ne1/src0->ne[1];
|
| 6613 |
+
float sf2 = (float)ne2/src0->ne[2];
|
| 6614 |
+
float sf3 = (float)ne3/src0->ne[3];
|
| 6615 |
|
| 6616 |
+
const int32_t mode_flags = ggml_get_op_params_i32(dst, 0);
|
| 6617 |
+
const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF);
|
| 6618 |
|
| 6619 |
if (mode == GGML_SCALE_MODE_NEAREST) {
|
| 6620 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
|
|
| 6635 |
}
|
| 6636 |
}
|
| 6637 |
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
|
| 6638 |
+
float pixel_offset = 0.5f;
|
| 6639 |
+
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
|
| 6640 |
+
pixel_offset = 0.0f;
|
| 6641 |
+
sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1);
|
| 6642 |
+
sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1);
|
| 6643 |
+
}
|
| 6644 |
|
| 6645 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 6646 |
const int64_t i03 = i3 / sf3;
|
ggml/src/ggml.c
CHANGED
|
@@ -4247,24 +4247,21 @@ struct ggml_tensor * ggml_pool_2d_back(
|
|
| 4247 |
return result;
|
| 4248 |
}
|
| 4249 |
|
| 4250 |
-
// ggml_upscale
|
| 4251 |
|
| 4252 |
-
static struct ggml_tensor *
|
| 4253 |
struct ggml_context * ctx,
|
| 4254 |
struct ggml_tensor * a,
|
| 4255 |
-
|
| 4256 |
-
|
| 4257 |
-
|
| 4258 |
-
|
| 4259 |
-
|
| 4260 |
-
GGML_ASSERT(
|
| 4261 |
-
|
| 4262 |
-
GGML_ASSERT(a->ne[2] <= ne2);
|
| 4263 |
-
GGML_ASSERT(a->ne[3] <= ne3);
|
| 4264 |
-
|
| 4265 |
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
| 4266 |
|
| 4267 |
-
ggml_set_op_params_i32(result, 0, mode);
|
| 4268 |
|
| 4269 |
result->op = GGML_OP_UPSCALE;
|
| 4270 |
result->src[0] = a;
|
|
@@ -4277,7 +4274,8 @@ struct ggml_tensor * ggml_upscale(
|
|
| 4277 |
struct ggml_tensor * a,
|
| 4278 |
int scale_factor,
|
| 4279 |
enum ggml_scale_mode mode) {
|
| 4280 |
-
|
|
|
|
| 4281 |
}
|
| 4282 |
|
| 4283 |
struct ggml_tensor * ggml_upscale_ext(
|
|
@@ -4288,7 +4286,18 @@ struct ggml_tensor * ggml_upscale_ext(
|
|
| 4288 |
int ne2,
|
| 4289 |
int ne3,
|
| 4290 |
enum ggml_scale_mode mode) {
|
| 4291 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4292 |
}
|
| 4293 |
|
| 4294 |
// ggml_pad
|
|
|
|
| 4247 |
return result;
|
| 4248 |
}
|
| 4249 |
|
| 4250 |
+
// ggml_upscale / ggml_interpolate
|
| 4251 |
|
| 4252 |
+
static struct ggml_tensor * ggml_interpolate_impl(
|
| 4253 |
struct ggml_context * ctx,
|
| 4254 |
struct ggml_tensor * a,
|
| 4255 |
+
int64_t ne0,
|
| 4256 |
+
int64_t ne1,
|
| 4257 |
+
int64_t ne2,
|
| 4258 |
+
int64_t ne3,
|
| 4259 |
+
uint32_t mode) {
|
| 4260 |
+
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
|
| 4261 |
+
|
|
|
|
|
|
|
|
|
|
| 4262 |
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
| 4263 |
|
| 4264 |
+
ggml_set_op_params_i32(result, 0, (int32_t)mode);
|
| 4265 |
|
| 4266 |
result->op = GGML_OP_UPSCALE;
|
| 4267 |
result->src[0] = a;
|
|
|
|
| 4274 |
struct ggml_tensor * a,
|
| 4275 |
int scale_factor,
|
| 4276 |
enum ggml_scale_mode mode) {
|
| 4277 |
+
GGML_ASSERT(scale_factor > 1);
|
| 4278 |
+
return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
|
| 4279 |
}
|
| 4280 |
|
| 4281 |
struct ggml_tensor * ggml_upscale_ext(
|
|
|
|
| 4286 |
int ne2,
|
| 4287 |
int ne3,
|
| 4288 |
enum ggml_scale_mode mode) {
|
| 4289 |
+
return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
|
| 4290 |
+
}
|
| 4291 |
+
|
| 4292 |
+
struct ggml_tensor * ggml_interpolate(
|
| 4293 |
+
struct ggml_context * ctx,
|
| 4294 |
+
struct ggml_tensor * a,
|
| 4295 |
+
int64_t ne0,
|
| 4296 |
+
int64_t ne1,
|
| 4297 |
+
int64_t ne2,
|
| 4298 |
+
int64_t ne3,
|
| 4299 |
+
uint32_t mode) {
|
| 4300 |
+
return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
|
| 4301 |
}
|
| 4302 |
|
| 4303 |
// ggml_pad
|