Spaces:
Running
Running
repo : update links to new url (llama/11886)
Browse files* repo : update links to new url
ggml-ci
* cont : more urls
ggml-ci
ggml/include/ggml-cpu.h
CHANGED
|
@@ -8,7 +8,7 @@ extern "C" {
|
|
| 8 |
#endif
|
| 9 |
|
| 10 |
// the compute plan that needs to be prepared for ggml_graph_compute()
|
| 11 |
-
// since https://github.com/
|
| 12 |
struct ggml_cplan {
|
| 13 |
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
| 14 |
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
|
|
|
| 8 |
#endif
|
| 9 |
|
| 10 |
// the compute plan that needs to be prepared for ggml_graph_compute()
|
| 11 |
+
// since https://github.com/ggml-org/ggml/issues/287
|
| 12 |
struct ggml_cplan {
|
| 13 |
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
| 14 |
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
ggml/include/ggml-metal.h
CHANGED
|
@@ -45,7 +45,7 @@ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
|
| 45 |
|
| 46 |
GGML_DEPRECATED(
|
| 47 |
GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
|
| 48 |
-
"obsoleted by the new device interface - https://github.com/
|
| 49 |
|
| 50 |
GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
|
| 51 |
|
|
|
|
| 45 |
|
| 46 |
GGML_DEPRECATED(
|
| 47 |
GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
|
| 48 |
+
"obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
|
| 49 |
|
| 50 |
GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
|
| 51 |
|
ggml/src/ggml-cpu/ggml-cpu.c
CHANGED
|
@@ -1816,7 +1816,7 @@ inline static float ggml_silu_f32(float x) {
|
|
| 1816 |
|
| 1817 |
#if __FINITE_MATH_ONLY__
|
| 1818 |
#error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
|
| 1819 |
-
#error "ref: https://github.com/
|
| 1820 |
#endif
|
| 1821 |
|
| 1822 |
#if defined(__ARM_NEON) && defined(__aarch64__)
|
|
@@ -7574,7 +7574,7 @@ UseGgmlGemm2:;
|
|
| 7574 |
int64_t nchunk1 = (nr1 + chunk_size - 1) / chunk_size;
|
| 7575 |
|
| 7576 |
// If the chunking is poor for the number of threads on this setup, scrap the whole plan. Re-chunk it by thread.
|
| 7577 |
-
// Also, chunking by thread was measured to have perform better on NUMA systems. See https://github.com/
|
| 7578 |
// In theory, chunking should be just as useful on NUMA and non NUMA systems, but testing disagreed with that.
|
| 7579 |
if (nchunk0 * nchunk1 < nth * 4 || ggml_is_numa()) {
|
| 7580 |
// distribute the thread work across the inner or outer loop based on which one is larger
|
|
|
|
| 1816 |
|
| 1817 |
#if __FINITE_MATH_ONLY__
|
| 1818 |
#error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
|
| 1819 |
+
#error "ref: https://github.com/ggml-org/llama.cpp/pull/7154#issuecomment-2143844461"
|
| 1820 |
#endif
|
| 1821 |
|
| 1822 |
#if defined(__ARM_NEON) && defined(__aarch64__)
|
|
|
|
| 7574 |
int64_t nchunk1 = (nr1 + chunk_size - 1) / chunk_size;
|
| 7575 |
|
| 7576 |
// If the chunking is poor for the number of threads on this setup, scrap the whole plan. Re-chunk it by thread.
|
| 7577 |
+
// Also, chunking by thread was measured to have perform better on NUMA systems. See https://github.com/ggml-org/llama.cpp/pull/6915
|
| 7578 |
// In theory, chunking should be just as useful on NUMA and non NUMA systems, but testing disagreed with that.
|
| 7579 |
if (nchunk0 * nchunk1 < nth * 4 || ggml_is_numa()) {
|
| 7580 |
// distribute the thread work across the inner or outer loop based on which one is larger
|
ggml/src/ggml-metal/ggml-metal.m
CHANGED
|
@@ -1983,7 +1983,7 @@ static void ggml_metal_encode_node(
|
|
| 1983 |
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
|
| 1984 |
|
| 1985 |
// TODO: add ggml_metal_kargs struct
|
| 1986 |
-
// TODO: optimize (see https://github.com/
|
| 1987 |
[encoder setComputePipelineState:pipeline];
|
| 1988 |
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
| 1989 |
if (id_src1) {
|
|
|
|
| 1983 |
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
|
| 1984 |
|
| 1985 |
// TODO: add ggml_metal_kargs struct
|
| 1986 |
+
// TODO: optimize (see https://github.com/ggml-org/llama.cpp/pull/10238/commits/7941b6b9ec29a2866fec6fa6c51612515ca509f6)
|
| 1987 |
[encoder setComputePipelineState:pipeline];
|
| 1988 |
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
| 1989 |
if (id_src1) {
|
ggml/src/ggml-metal/ggml-metal.metal
CHANGED
|
@@ -1058,7 +1058,7 @@ kernel void kernel_soft_max(
|
|
| 1058 |
}
|
| 1059 |
|
| 1060 |
// This barrier fixes a failing test
|
| 1061 |
-
// ref: https://github.com/
|
| 1062 |
threadgroup_barrier(mem_flags::mem_none);
|
| 1063 |
|
| 1064 |
float sum = simd_sum(lsum);
|
|
@@ -1163,7 +1163,7 @@ kernel void kernel_soft_max_4(
|
|
| 1163 |
const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3];
|
| 1164 |
|
| 1165 |
// This barrier fixes a failing test
|
| 1166 |
-
// ref: https://github.com/
|
| 1167 |
threadgroup_barrier(mem_flags::mem_none);
|
| 1168 |
|
| 1169 |
float sum = simd_sum(lsum);
|
|
|
|
| 1058 |
}
|
| 1059 |
|
| 1060 |
// This barrier fixes a failing test
|
| 1061 |
+
// ref: https://github.com/ggml-org/ggml/pull/621#discussion_r1425156335
|
| 1062 |
threadgroup_barrier(mem_flags::mem_none);
|
| 1063 |
|
| 1064 |
float sum = simd_sum(lsum);
|
|
|
|
| 1163 |
const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3];
|
| 1164 |
|
| 1165 |
// This barrier fixes a failing test
|
| 1166 |
+
// ref: https://github.com/ggml-org/ggml/pull/621#discussion_r1425156335
|
| 1167 |
threadgroup_barrier(mem_flags::mem_none);
|
| 1168 |
|
| 1169 |
float sum = simd_sum(lsum);
|