ggerganov commited on
Commit
d4b3604
·
1 Parent(s): 56e6751

metal : utilize max shared memory for mul_mat_id (llama/7935)

Browse files
Files changed (1) hide show
  1. ggml-metal.m +2 -1
ggml-metal.m CHANGED
@@ -1862,9 +1862,10 @@ static enum ggml_status ggml_metal_graph_compute(
1862
  // ne21 = n_rows
1863
  const int dst_rows = ne20*ne21;
1864
  const int dst_rows_min = n_as;
 
1865
 
1866
  // max size of the rowids array in the kernel shared buffer
1867
- GGML_ASSERT(dst_rows <= 2048);
1868
 
1869
  // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
1870
  // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
 
1862
  // ne21 = n_rows
1863
  const int dst_rows = ne20*ne21;
1864
  const int dst_rows_min = n_as;
1865
+ const int dst_rows_max = (ctx->device.maxThreadgroupMemoryLength - 32 - 8192)/4;
1866
 
1867
  // max size of the rowids array in the kernel shared buffer
1868
+ GGML_ASSERT(dst_rows <= dst_rows_max);
1869
 
1870
  // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
1871
  // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel