Spaces:
Running
Running
William Tambellini
slaren
commited on
Commit
·
d6b6852
1
Parent(s):
d7d82b9
ggml : upgrade init_tensor API to return a ggml_status (llama/11854)
Browse files* Upgrade init_tensor API to return a ggml_status
To prepare for an 'abort-free' ggml
(ggml not to abort on OOMs but return a OOM status),
as agreeed with Diego in the ggml repo,
upgrade the init_tensor() and view_init() APIs
to return a ggml_status.
* misc fixes
---------
Co-authored-by: slaren <[email protected]>
- ggml/include/ggml-alloc.h +1 -1
- ggml/include/ggml-backend.h +3 -3
- ggml/src/ggml-alloc.c +24 -15
- ggml/src/ggml-backend-impl.h +1 -1
- ggml/src/ggml-backend.cpp +9 -8
- ggml/src/ggml-cann/ggml-cann.cpp +3 -2
- ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- ggml/src/ggml-cuda/ggml-cuda.cu +5 -3
- ggml/src/ggml-opencl/ggml-opencl.cpp +2 -1
- ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- ggml/src/ggml-sycl/ggml-sycl.cpp +5 -3
- ggml/src/ggml-vulkan/ggml-vulkan.cpp +2 -1
ggml/include/ggml-alloc.h
CHANGED
|
@@ -19,7 +19,7 @@ struct ggml_tallocr {
|
|
| 19 |
};
|
| 20 |
|
| 21 |
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
| 22 |
-
GGML_API
|
| 23 |
|
| 24 |
// Graph allocator
|
| 25 |
/*
|
|
|
|
| 19 |
};
|
| 20 |
|
| 21 |
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
| 22 |
+
GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
|
| 23 |
|
| 24 |
// Graph allocator
|
| 25 |
/*
|
ggml/include/ggml-backend.h
CHANGED
|
@@ -56,7 +56,7 @@ extern "C" {
|
|
| 56 |
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
| 57 |
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
| 58 |
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
| 59 |
-
GGML_API
|
| 60 |
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
| 61 |
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
| 62 |
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
@@ -342,8 +342,8 @@ extern "C" {
|
|
| 342 |
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
| 343 |
|
| 344 |
// Tensor initialization
|
| 345 |
-
GGML_API
|
| 346 |
-
GGML_API
|
| 347 |
|
| 348 |
// CPU buffer types are always available
|
| 349 |
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
|
|
|
| 56 |
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
| 57 |
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
| 58 |
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
| 59 |
+
GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
| 60 |
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
| 61 |
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
| 62 |
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
|
|
| 342 |
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
| 343 |
|
| 344 |
// Tensor initialization
|
| 345 |
+
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
| 346 |
+
GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
|
| 347 |
|
| 348 |
// CPU buffer types are always available
|
| 349 |
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
ggml/src/ggml-alloc.c
CHANGED
|
@@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
|
| 89 |
return talloc;
|
| 90 |
}
|
| 91 |
|
| 92 |
-
|
| 93 |
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
|
| 94 |
size = GGML_PAD(size, talloc->alignment);
|
| 95 |
|
|
@@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
|
|
| 104 |
|
| 105 |
assert(((uintptr_t)addr % talloc->alignment) == 0);
|
| 106 |
|
| 107 |
-
ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
|
| 108 |
}
|
| 109 |
|
| 110 |
// dynamic tensor allocator
|
|
@@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
|
|
| 933 |
|
| 934 |
// utils
|
| 935 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 936 |
static bool alloc_tensor_range(struct ggml_context * ctx,
|
| 937 |
struct ggml_tensor * first, struct ggml_tensor * last,
|
| 938 |
ggml_backend_buffer_type_t buft, size_t size,
|
| 939 |
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
|
|
|
|
| 940 |
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
|
| 941 |
if (buffer == NULL) {
|
| 942 |
-
|
| 943 |
-
|
| 944 |
-
#endif
|
| 945 |
-
for (size_t i = 0; i < *n_buffers; i++) {
|
| 946 |
-
ggml_backend_buffer_free((*buffers)[i]);
|
| 947 |
-
}
|
| 948 |
-
free(*buffers);
|
| 949 |
return false;
|
| 950 |
}
|
| 951 |
|
|
|
|
|
|
|
|
|
|
| 952 |
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
| 953 |
|
| 954 |
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
|
|
|
|
| 955 |
if (t->data == NULL) {
|
| 956 |
if (t->view_src == NULL) {
|
| 957 |
-
ggml_tallocr_alloc(&tallocr, t);
|
| 958 |
} else if (t->buffer == NULL) {
|
| 959 |
-
ggml_backend_view_init(t);
|
| 960 |
}
|
| 961 |
} else {
|
| 962 |
if (t->view_src != NULL && t->buffer == NULL) {
|
| 963 |
// view of a pre-allocated tensor
|
| 964 |
-
ggml_backend_view_init(t);
|
| 965 |
}
|
| 966 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 967 |
}
|
| 968 |
|
| 969 |
-
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
| 970 |
-
(*buffers)[(*n_buffers)++] = buffer;
|
| 971 |
-
|
| 972 |
return true;
|
| 973 |
}
|
| 974 |
|
|
|
|
| 89 |
return talloc;
|
| 90 |
}
|
| 91 |
|
| 92 |
+
enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
|
| 93 |
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
|
| 94 |
size = GGML_PAD(size, talloc->alignment);
|
| 95 |
|
|
|
|
| 104 |
|
| 105 |
assert(((uintptr_t)addr % talloc->alignment) == 0);
|
| 106 |
|
| 107 |
+
return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
|
| 108 |
}
|
| 109 |
|
| 110 |
// dynamic tensor allocator
|
|
|
|
| 933 |
|
| 934 |
// utils
|
| 935 |
|
| 936 |
+
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
|
| 937 |
+
for (size_t i = 0; i < *n_buffers; i++) {
|
| 938 |
+
ggml_backend_buffer_free((*buffers)[i]);
|
| 939 |
+
}
|
| 940 |
+
free(*buffers);
|
| 941 |
+
}
|
| 942 |
+
|
| 943 |
static bool alloc_tensor_range(struct ggml_context * ctx,
|
| 944 |
struct ggml_tensor * first, struct ggml_tensor * last,
|
| 945 |
ggml_backend_buffer_type_t buft, size_t size,
|
| 946 |
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
|
| 947 |
+
|
| 948 |
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
|
| 949 |
if (buffer == NULL) {
|
| 950 |
+
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
|
| 951 |
+
free_buffers(buffers, n_buffers);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 952 |
return false;
|
| 953 |
}
|
| 954 |
|
| 955 |
+
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
| 956 |
+
(*buffers)[(*n_buffers)++] = buffer;
|
| 957 |
+
|
| 958 |
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
| 959 |
|
| 960 |
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
|
| 961 |
+
enum ggml_status status = GGML_STATUS_SUCCESS;
|
| 962 |
if (t->data == NULL) {
|
| 963 |
if (t->view_src == NULL) {
|
| 964 |
+
status = ggml_tallocr_alloc(&tallocr, t);
|
| 965 |
} else if (t->buffer == NULL) {
|
| 966 |
+
status = ggml_backend_view_init(t);
|
| 967 |
}
|
| 968 |
} else {
|
| 969 |
if (t->view_src != NULL && t->buffer == NULL) {
|
| 970 |
// view of a pre-allocated tensor
|
| 971 |
+
status = ggml_backend_view_init(t);
|
| 972 |
}
|
| 973 |
}
|
| 974 |
+
if (status != GGML_STATUS_SUCCESS) {
|
| 975 |
+
GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
|
| 976 |
+
free_buffers(buffers, n_buffers);
|
| 977 |
+
return false;
|
| 978 |
+
}
|
| 979 |
}
|
| 980 |
|
|
|
|
|
|
|
|
|
|
| 981 |
return true;
|
| 982 |
}
|
| 983 |
|
ggml/src/ggml-backend-impl.h
CHANGED
|
@@ -44,7 +44,7 @@ extern "C" {
|
|
| 44 |
// base address of the buffer
|
| 45 |
void * (*get_base) (ggml_backend_buffer_t buffer);
|
| 46 |
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
|
| 47 |
-
|
| 48 |
// tensor data access
|
| 49 |
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
| 50 |
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
|
|
| 44 |
// base address of the buffer
|
| 45 |
void * (*get_base) (ggml_backend_buffer_t buffer);
|
| 46 |
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
|
| 47 |
+
enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
| 48 |
// tensor data access
|
| 49 |
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
| 50 |
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
ggml/src/ggml-backend.cpp
CHANGED
|
@@ -126,11 +126,12 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
| 126 |
return base;
|
| 127 |
}
|
| 128 |
|
| 129 |
-
|
| 130 |
// init_tensor is optional
|
| 131 |
if (buffer->iface.init_tensor) {
|
| 132 |
-
buffer->iface.init_tensor(buffer, tensor);
|
| 133 |
}
|
|
|
|
| 134 |
}
|
| 135 |
|
| 136 |
void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
@@ -1641,7 +1642,7 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
|
|
| 1641 |
|
| 1642 |
// utils
|
| 1643 |
|
| 1644 |
-
|
| 1645 |
GGML_ASSERT(tensor->buffer == NULL);
|
| 1646 |
GGML_ASSERT(tensor->view_src != NULL);
|
| 1647 |
GGML_ASSERT(tensor->view_src->buffer != NULL);
|
|
@@ -1649,10 +1650,10 @@ void ggml_backend_view_init(struct ggml_tensor * tensor) {
|
|
| 1649 |
|
| 1650 |
tensor->buffer = tensor->view_src->buffer;
|
| 1651 |
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
|
| 1652 |
-
ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
|
| 1653 |
}
|
| 1654 |
|
| 1655 |
-
|
| 1656 |
GGML_ASSERT(tensor->buffer == NULL);
|
| 1657 |
GGML_ASSERT(tensor->data == NULL);
|
| 1658 |
GGML_ASSERT(tensor->view_src == NULL);
|
|
@@ -1662,7 +1663,7 @@ void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor
|
|
| 1662 |
|
| 1663 |
tensor->buffer = buffer;
|
| 1664 |
tensor->data = addr;
|
| 1665 |
-
ggml_backend_buffer_init_tensor(buffer, tensor);
|
| 1666 |
}
|
| 1667 |
|
| 1668 |
static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
|
|
@@ -1708,7 +1709,8 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_
|
|
| 1708 |
struct ggml_tensor * dst = node_copies[id];
|
| 1709 |
if (dst->view_src != NULL) {
|
| 1710 |
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
|
| 1711 |
-
ggml_backend_view_init(dst);
|
|
|
|
| 1712 |
}
|
| 1713 |
else {
|
| 1714 |
ggml_backend_tensor_copy(src, dst);
|
|
@@ -1823,7 +1825,6 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
|
| 1823 |
assert(g1->n_nodes == g2->n_nodes);
|
| 1824 |
|
| 1825 |
for (int i = 0; i < g1->n_nodes; i++) {
|
| 1826 |
-
//printf("eval %d/%d\n", i, g1->n_nodes);
|
| 1827 |
struct ggml_tensor * t1 = g1->nodes[i];
|
| 1828 |
struct ggml_tensor * t2 = g2->nodes[i];
|
| 1829 |
|
|
|
|
| 126 |
return base;
|
| 127 |
}
|
| 128 |
|
| 129 |
+
enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
| 130 |
// init_tensor is optional
|
| 131 |
if (buffer->iface.init_tensor) {
|
| 132 |
+
return buffer->iface.init_tensor(buffer, tensor);
|
| 133 |
}
|
| 134 |
+
return GGML_STATUS_SUCCESS;
|
| 135 |
}
|
| 136 |
|
| 137 |
void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
|
|
| 1642 |
|
| 1643 |
// utils
|
| 1644 |
|
| 1645 |
+
enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
|
| 1646 |
GGML_ASSERT(tensor->buffer == NULL);
|
| 1647 |
GGML_ASSERT(tensor->view_src != NULL);
|
| 1648 |
GGML_ASSERT(tensor->view_src->buffer != NULL);
|
|
|
|
| 1650 |
|
| 1651 |
tensor->buffer = tensor->view_src->buffer;
|
| 1652 |
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
|
| 1653 |
+
return ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
|
| 1654 |
}
|
| 1655 |
|
| 1656 |
+
enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
|
| 1657 |
GGML_ASSERT(tensor->buffer == NULL);
|
| 1658 |
GGML_ASSERT(tensor->data == NULL);
|
| 1659 |
GGML_ASSERT(tensor->view_src == NULL);
|
|
|
|
| 1663 |
|
| 1664 |
tensor->buffer = buffer;
|
| 1665 |
tensor->data = addr;
|
| 1666 |
+
return ggml_backend_buffer_init_tensor(buffer, tensor);
|
| 1667 |
}
|
| 1668 |
|
| 1669 |
static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
|
|
|
|
| 1709 |
struct ggml_tensor * dst = node_copies[id];
|
| 1710 |
if (dst->view_src != NULL) {
|
| 1711 |
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
|
| 1712 |
+
enum ggml_status status = ggml_backend_view_init(dst);
|
| 1713 |
+
GGML_ASSERT(status == GGML_STATUS_SUCCESS);
|
| 1714 |
}
|
| 1715 |
else {
|
| 1716 |
ggml_backend_tensor_copy(src, dst);
|
|
|
|
| 1825 |
assert(g1->n_nodes == g2->n_nodes);
|
| 1826 |
|
| 1827 |
for (int i = 0; i < g1->n_nodes; i++) {
|
|
|
|
| 1828 |
struct ggml_tensor * t1 = g1->nodes[i];
|
| 1829 |
struct ggml_tensor * t2 = g2->nodes[i];
|
| 1830 |
|
ggml/src/ggml-cann/ggml-cann.cpp
CHANGED
|
@@ -796,11 +796,11 @@ static bool need_transform(ggml_type type) {
|
|
| 796 |
* @param buffer The CANN buffer from which to initialize the tensor.
|
| 797 |
* @param tensor Pointer to the tensor to be initialized.
|
| 798 |
*/
|
| 799 |
-
static
|
| 800 |
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
|
| 801 |
if (tensor->view_src != NULL && tensor->view_offs == 0) {
|
| 802 |
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
| 803 |
-
return;
|
| 804 |
}
|
| 805 |
|
| 806 |
// TODO: can backend doesn't support quantized yet. Just leave the code
|
|
@@ -817,6 +817,7 @@ static void ggml_backend_cann_buffer_init_tensor(
|
|
| 817 |
memset_size, 0, memset_size));
|
| 818 |
}
|
| 819 |
}
|
|
|
|
| 820 |
}
|
| 821 |
|
| 822 |
// TODO: need handle tensor which has paddings.
|
|
|
|
| 796 |
* @param buffer The CANN buffer from which to initialize the tensor.
|
| 797 |
* @param tensor Pointer to the tensor to be initialized.
|
| 798 |
*/
|
| 799 |
+
static enum ggml_status ggml_backend_cann_buffer_init_tensor(
|
| 800 |
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
|
| 801 |
if (tensor->view_src != NULL && tensor->view_offs == 0) {
|
| 802 |
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
| 803 |
+
return GGML_STATUS_SUCCESS;
|
| 804 |
}
|
| 805 |
|
| 806 |
// TODO: can backend doesn't support quantized yet. Just leave the code
|
|
|
|
| 817 |
memset_size, 0, memset_size));
|
| 818 |
}
|
| 819 |
}
|
| 820 |
+
return GGML_STATUS_SUCCESS;
|
| 821 |
}
|
| 822 |
|
| 823 |
// TODO: need handle tensor which has paddings.
|
ggml/src/ggml-cpu/amx/amx.cpp
CHANGED
|
@@ -50,10 +50,11 @@ static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
| 50 |
return (void *) (buffer->context);
|
| 51 |
}
|
| 52 |
|
| 53 |
-
static
|
| 54 |
tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);
|
| 55 |
|
| 56 |
GGML_UNUSED(buffer);
|
|
|
|
| 57 |
}
|
| 58 |
|
| 59 |
static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
|
|
|
|
| 50 |
return (void *) (buffer->context);
|
| 51 |
}
|
| 52 |
|
| 53 |
+
static enum ggml_status ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
| 54 |
tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);
|
| 55 |
|
| 56 |
GGML_UNUSED(buffer);
|
| 57 |
+
return GGML_STATUS_SUCCESS;
|
| 58 |
}
|
| 59 |
|
| 60 |
static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
|
ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
CHANGED
|
@@ -4135,10 +4135,11 @@ static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(con
|
|
| 4135 |
return nullptr;
|
| 4136 |
}
|
| 4137 |
|
| 4138 |
-
static
|
| 4139 |
tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_aarch64_get_optimal_repack_type(tensor));
|
| 4140 |
|
| 4141 |
GGML_UNUSED(buffer);
|
|
|
|
| 4142 |
}
|
| 4143 |
|
| 4144 |
static void ggml_backend_cpu_aarch64_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
|
|
|
|
| 4135 |
return nullptr;
|
| 4136 |
}
|
| 4137 |
|
| 4138 |
+
static enum ggml_status ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
| 4139 |
tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_aarch64_get_optimal_repack_type(tensor));
|
| 4140 |
|
| 4141 |
GGML_UNUSED(buffer);
|
| 4142 |
+
return GGML_STATUS_SUCCESS;
|
| 4143 |
}
|
| 4144 |
|
| 4145 |
static void ggml_backend_cpu_aarch64_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
|
ggml/src/ggml-cuda/ggml-cuda.cu
CHANGED
|
@@ -540,12 +540,12 @@ static void * ggml_backend_cuda_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
| 540 |
return ctx->dev_ptr;
|
| 541 |
}
|
| 542 |
|
| 543 |
-
static
|
| 544 |
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
| 545 |
|
| 546 |
if (tensor->view_src != NULL) {
|
| 547 |
assert(tensor->view_src->buffer->buft == buffer->buft);
|
| 548 |
-
return;
|
| 549 |
}
|
| 550 |
|
| 551 |
if (ggml_is_quantized(tensor->type) && tensor->view_src == nullptr && ggml_backend_buffer_get_usage(buffer) != GGML_BACKEND_BUFFER_USAGE_COMPUTE) {
|
|
@@ -558,6 +558,7 @@ static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, g
|
|
| 558 |
CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size));
|
| 559 |
}
|
| 560 |
}
|
|
|
|
| 561 |
}
|
| 562 |
|
| 563 |
static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
|
@@ -792,7 +793,7 @@ static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buff
|
|
| 792 |
GGML_UNUSED(buffer);
|
| 793 |
}
|
| 794 |
|
| 795 |
-
static
|
| 796 |
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
| 797 |
|
| 798 |
ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
|
|
@@ -838,6 +839,7 @@ static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buf
|
|
| 838 |
}
|
| 839 |
}
|
| 840 |
tensor->extra = extra;
|
|
|
|
| 841 |
}
|
| 842 |
|
| 843 |
static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
|
|
|
| 540 |
return ctx->dev_ptr;
|
| 541 |
}
|
| 542 |
|
| 543 |
+
static enum ggml_status ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
| 544 |
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
| 545 |
|
| 546 |
if (tensor->view_src != NULL) {
|
| 547 |
assert(tensor->view_src->buffer->buft == buffer->buft);
|
| 548 |
+
return GGML_STATUS_SUCCESS;
|
| 549 |
}
|
| 550 |
|
| 551 |
if (ggml_is_quantized(tensor->type) && tensor->view_src == nullptr && ggml_backend_buffer_get_usage(buffer) != GGML_BACKEND_BUFFER_USAGE_COMPUTE) {
|
|
|
|
| 558 |
CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size));
|
| 559 |
}
|
| 560 |
}
|
| 561 |
+
return GGML_STATUS_SUCCESS;
|
| 562 |
}
|
| 563 |
|
| 564 |
static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
|
|
|
| 793 |
GGML_UNUSED(buffer);
|
| 794 |
}
|
| 795 |
|
| 796 |
+
static enum ggml_status ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
| 797 |
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
| 798 |
|
| 799 |
ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
|
|
|
|
| 839 |
}
|
| 840 |
}
|
| 841 |
tensor->extra = extra;
|
| 842 |
+
return GGML_STATUS_SUCCESS;
|
| 843 |
}
|
| 844 |
|
| 845 |
static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
ggml/src/ggml-opencl/ggml-opencl.cpp
CHANGED
|
@@ -1211,7 +1211,7 @@ static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer)
|
|
| 1211 |
GGML_UNUSED(buffer);
|
| 1212 |
}
|
| 1213 |
|
| 1214 |
-
static
|
| 1215 |
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
| 1216 |
|
| 1217 |
ggml_cl2_init(buffer->buft->device);
|
|
@@ -1251,6 +1251,7 @@ static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
| 1251 |
tensor->extra = extra;
|
| 1252 |
}
|
| 1253 |
}
|
|
|
|
| 1254 |
}
|
| 1255 |
|
| 1256 |
// The optimized gemm and gemv kernels are used for large matrices without batch.
|
|
|
|
| 1211 |
GGML_UNUSED(buffer);
|
| 1212 |
}
|
| 1213 |
|
| 1214 |
+
static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
| 1215 |
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
| 1216 |
|
| 1217 |
ggml_cl2_init(buffer->buft->device);
|
|
|
|
| 1251 |
tensor->extra = extra;
|
| 1252 |
}
|
| 1253 |
}
|
| 1254 |
+
return GGML_STATUS_SUCCESS;
|
| 1255 |
}
|
| 1256 |
|
| 1257 |
// The optimized gemm and gemv kernels are used for large matrices without batch.
|
ggml/src/ggml-rpc/ggml-rpc.cpp
CHANGED
|
@@ -464,7 +464,7 @@ static rpc_tensor serialize_tensor(const ggml_tensor * tensor) {
|
|
| 464 |
return result;
|
| 465 |
}
|
| 466 |
|
| 467 |
-
static
|
| 468 |
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
|
| 469 |
|
| 470 |
// CUDA backend on the server pads everything to 512 due to CUDA limitations.
|
|
@@ -478,6 +478,7 @@ static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, gg
|
|
| 478 |
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_INIT_TENSOR, &request, sizeof(request), nullptr, 0);
|
| 479 |
GGML_ASSERT(status);
|
| 480 |
}
|
|
|
|
| 481 |
}
|
| 482 |
|
| 483 |
static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
|
|
|
| 464 |
return result;
|
| 465 |
}
|
| 466 |
|
| 467 |
+
static enum ggml_status ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
| 468 |
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
|
| 469 |
|
| 470 |
// CUDA backend on the server pads everything to 512 due to CUDA limitations.
|
|
|
|
| 478 |
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_INIT_TENSOR, &request, sizeof(request), nullptr, 0);
|
| 479 |
GGML_ASSERT(status);
|
| 480 |
}
|
| 481 |
+
return GGML_STATUS_SUCCESS;
|
| 482 |
}
|
| 483 |
|
| 484 |
static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
ggml/src/ggml-sycl/ggml-sycl.cpp
CHANGED
|
@@ -323,14 +323,14 @@ static void * ggml_backend_sycl_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
| 323 |
return ctx->dev_ptr;
|
| 324 |
}
|
| 325 |
|
| 326 |
-
static
|
| 327 |
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 328 |
ggml_tensor *tensor) try {
|
| 329 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
| 330 |
|
| 331 |
if (tensor->view_src != NULL) {
|
| 332 |
assert(tensor->view_src->buffer->buft == buffer->buft);
|
| 333 |
-
return;
|
| 334 |
}
|
| 335 |
|
| 336 |
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
|
|
@@ -348,6 +348,7 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
| 348 |
padded_size - original_size).wait()));
|
| 349 |
}
|
| 350 |
}
|
|
|
|
| 351 |
}
|
| 352 |
catch (sycl::exception const &exc) {
|
| 353 |
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
|
@@ -729,7 +730,7 @@ static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buff
|
|
| 729 |
GGML_UNUSED(buffer);
|
| 730 |
}
|
| 731 |
|
| 732 |
-
static
|
| 733 |
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 734 |
ggml_tensor *tensor) try {
|
| 735 |
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
|
@@ -804,6 +805,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
| 804 |
}
|
| 805 |
}
|
| 806 |
tensor->extra = extra;
|
|
|
|
| 807 |
}
|
| 808 |
catch (sycl::exception const &exc) {
|
| 809 |
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
|
|
|
| 323 |
return ctx->dev_ptr;
|
| 324 |
}
|
| 325 |
|
| 326 |
+
static enum ggml_status
|
| 327 |
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 328 |
ggml_tensor *tensor) try {
|
| 329 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
| 330 |
|
| 331 |
if (tensor->view_src != NULL) {
|
| 332 |
assert(tensor->view_src->buffer->buft == buffer->buft);
|
| 333 |
+
return GGML_STATUS_SUCCESS;
|
| 334 |
}
|
| 335 |
|
| 336 |
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
|
|
|
|
| 348 |
padded_size - original_size).wait()));
|
| 349 |
}
|
| 350 |
}
|
| 351 |
+
return GGML_STATUS_SUCCESS;
|
| 352 |
}
|
| 353 |
catch (sycl::exception const &exc) {
|
| 354 |
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
|
|
|
| 730 |
GGML_UNUSED(buffer);
|
| 731 |
}
|
| 732 |
|
| 733 |
+
static enum ggml_status
|
| 734 |
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 735 |
ggml_tensor *tensor) try {
|
| 736 |
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
|
|
|
| 805 |
}
|
| 806 |
}
|
| 807 |
tensor->extra = extra;
|
| 808 |
+
return GGML_STATUS_SUCCESS;
|
| 809 |
}
|
| 810 |
catch (sycl::exception const &exc) {
|
| 811 |
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
ggml/src/ggml-vulkan/ggml-vulkan.cpp
CHANGED
|
@@ -7845,11 +7845,12 @@ static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
| 7845 |
UNUSED(buffer);
|
| 7846 |
}
|
| 7847 |
|
| 7848 |
-
static
|
| 7849 |
VK_LOG_DEBUG("ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")");
|
| 7850 |
if (tensor->view_src != nullptr) {
|
| 7851 |
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
| 7852 |
}
|
|
|
|
| 7853 |
}
|
| 7854 |
|
| 7855 |
static void ggml_backend_vk_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
|
|
|
| 7845 |
UNUSED(buffer);
|
| 7846 |
}
|
| 7847 |
|
| 7848 |
+
static enum ggml_status ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
| 7849 |
VK_LOG_DEBUG("ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")");
|
| 7850 |
if (tensor->view_src != nullptr) {
|
| 7851 |
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
| 7852 |
}
|
| 7853 |
+
return GGML_STATUS_SUCCESS;
|
| 7854 |
}
|
| 7855 |
|
| 7856 |
static void ggml_backend_vk_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|