Spaces:
Running
Running
metal : utilize max shared memory for mul_mat_id (llama/7935)
Browse files- ggml-metal.m +2 -1
ggml-metal.m
CHANGED
|
@@ -1862,9 +1862,10 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 1862 |
// ne21 = n_rows
|
| 1863 |
const int dst_rows = ne20*ne21;
|
| 1864 |
const int dst_rows_min = n_as;
|
|
|
|
| 1865 |
|
| 1866 |
// max size of the rowids array in the kernel shared buffer
|
| 1867 |
-
GGML_ASSERT(dst_rows <=
|
| 1868 |
|
| 1869 |
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
| 1870 |
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
|
|
|
| 1862 |
// ne21 = n_rows
|
| 1863 |
const int dst_rows = ne20*ne21;
|
| 1864 |
const int dst_rows_min = n_as;
|
| 1865 |
+
const int dst_rows_max = (ctx->device.maxThreadgroupMemoryLength - 32 - 8192)/4;
|
| 1866 |
|
| 1867 |
// max size of the rowids array in the kernel shared buffer
|
| 1868 |
+
GGML_ASSERT(dst_rows <= dst_rows_max);
|
| 1869 |
|
| 1870 |
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
| 1871 |
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|