William Tambellini slaren commited on
Commit
d6b6852
·
1 Parent(s): d7d82b9

ggml : upgrade init_tensor API to return a ggml_status (llama/11854)

Browse files

* Upgrade init_tensor API to return a ggml_status

To prepare for an 'abort-free' ggml
(ggml not to abort on OOMs but return a OOM status),
as agreeed with Diego in the ggml repo,
upgrade the init_tensor() and view_init() APIs
to return a ggml_status.

* misc fixes

---------

Co-authored-by: slaren <[email protected]>

ggml/include/ggml-alloc.h CHANGED
@@ -19,7 +19,7 @@ struct ggml_tallocr {
19
  };
20
 
21
  GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
22
- GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
23
 
24
  // Graph allocator
25
  /*
 
19
  };
20
 
21
  GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
22
+ GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
23
 
24
  // Graph allocator
25
  /*
ggml/include/ggml-backend.h CHANGED
@@ -56,7 +56,7 @@ extern "C" {
56
  GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
57
  GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
58
  GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
59
- GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
60
  GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
61
  GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
62
  GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
@@ -342,8 +342,8 @@ extern "C" {
342
  GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
343
 
344
  // Tensor initialization
345
- GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346
- GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
347
 
348
  // CPU buffer types are always available
349
  GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
 
56
  GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
57
  GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
58
  GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
59
+ GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
60
  GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
61
  GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
62
  GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
 
342
  GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
343
 
344
  // Tensor initialization
345
+ GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346
+ GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
347
 
348
  // CPU buffer types are always available
349
  GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
ggml/src/ggml-alloc.c CHANGED
@@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
89
  return talloc;
90
  }
91
 
92
- void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
93
  size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
94
  size = GGML_PAD(size, talloc->alignment);
95
 
@@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
104
 
105
  assert(((uintptr_t)addr % talloc->alignment) == 0);
106
 
107
- ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
108
  }
109
 
110
  // dynamic tensor allocator
@@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
933
 
934
  // utils
935
 
 
 
 
 
 
 
 
936
  static bool alloc_tensor_range(struct ggml_context * ctx,
937
  struct ggml_tensor * first, struct ggml_tensor * last,
938
  ggml_backend_buffer_type_t buft, size_t size,
939
  ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
 
940
  ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
941
  if (buffer == NULL) {
942
- #ifndef NDEBUG
943
- GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
944
- #endif
945
- for (size_t i = 0; i < *n_buffers; i++) {
946
- ggml_backend_buffer_free((*buffers)[i]);
947
- }
948
- free(*buffers);
949
  return false;
950
  }
951
 
 
 
 
952
  struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
953
 
954
  for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
 
955
  if (t->data == NULL) {
956
  if (t->view_src == NULL) {
957
- ggml_tallocr_alloc(&tallocr, t);
958
  } else if (t->buffer == NULL) {
959
- ggml_backend_view_init(t);
960
  }
961
  } else {
962
  if (t->view_src != NULL && t->buffer == NULL) {
963
  // view of a pre-allocated tensor
964
- ggml_backend_view_init(t);
965
  }
966
  }
 
 
 
 
 
967
  }
968
 
969
- *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
970
- (*buffers)[(*n_buffers)++] = buffer;
971
-
972
  return true;
973
  }
974
 
 
89
  return talloc;
90
  }
91
 
92
+ enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
93
  size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
94
  size = GGML_PAD(size, talloc->alignment);
95
 
 
104
 
105
  assert(((uintptr_t)addr % talloc->alignment) == 0);
106
 
107
+ return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
108
  }
109
 
110
  // dynamic tensor allocator
 
933
 
934
  // utils
935
 
936
+ static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
937
+ for (size_t i = 0; i < *n_buffers; i++) {
938
+ ggml_backend_buffer_free((*buffers)[i]);
939
+ }
940
+ free(*buffers);
941
+ }
942
+
943
  static bool alloc_tensor_range(struct ggml_context * ctx,
944
  struct ggml_tensor * first, struct ggml_tensor * last,
945
  ggml_backend_buffer_type_t buft, size_t size,
946
  ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
947
+
948
  ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
949
  if (buffer == NULL) {
950
+ GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
951
+ free_buffers(buffers, n_buffers);
 
 
 
 
 
952
  return false;
953
  }
954
 
955
+ *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
956
+ (*buffers)[(*n_buffers)++] = buffer;
957
+
958
  struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
959
 
960
  for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
961
+ enum ggml_status status = GGML_STATUS_SUCCESS;
962
  if (t->data == NULL) {
963
  if (t->view_src == NULL) {
964
+ status = ggml_tallocr_alloc(&tallocr, t);
965
  } else if (t->buffer == NULL) {
966
+ status = ggml_backend_view_init(t);
967
  }
968
  } else {
969
  if (t->view_src != NULL && t->buffer == NULL) {
970
  // view of a pre-allocated tensor
971
+ status = ggml_backend_view_init(t);
972
  }
973
  }
974
+ if (status != GGML_STATUS_SUCCESS) {
975
+ GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
976
+ free_buffers(buffers, n_buffers);
977
+ return false;
978
+ }
979
  }
980
 
 
 
 
981
  return true;
982
  }
983
 
ggml/src/ggml-backend-impl.h CHANGED
@@ -44,7 +44,7 @@ extern "C" {
44
  // base address of the buffer
45
  void * (*get_base) (ggml_backend_buffer_t buffer);
46
  // (optional) initialize a tensor in the buffer (eg. add tensor extras)
47
- void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
48
  // tensor data access
49
  void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
50
  void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
 
44
  // base address of the buffer
45
  void * (*get_base) (ggml_backend_buffer_t buffer);
46
  // (optional) initialize a tensor in the buffer (eg. add tensor extras)
47
+ enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
48
  // tensor data access
49
  void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
50
  void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
ggml/src/ggml-backend.cpp CHANGED
@@ -126,11 +126,12 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
126
  return base;
127
  }
128
 
129
- void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
130
  // init_tensor is optional
131
  if (buffer->iface.init_tensor) {
132
- buffer->iface.init_tensor(buffer, tensor);
133
  }
 
134
  }
135
 
136
  void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
@@ -1641,7 +1642,7 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
1641
 
1642
  // utils
1643
 
1644
- void ggml_backend_view_init(struct ggml_tensor * tensor) {
1645
  GGML_ASSERT(tensor->buffer == NULL);
1646
  GGML_ASSERT(tensor->view_src != NULL);
1647
  GGML_ASSERT(tensor->view_src->buffer != NULL);
@@ -1649,10 +1650,10 @@ void ggml_backend_view_init(struct ggml_tensor * tensor) {
1649
 
1650
  tensor->buffer = tensor->view_src->buffer;
1651
  tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
1652
- ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
1653
  }
1654
 
1655
- void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
1656
  GGML_ASSERT(tensor->buffer == NULL);
1657
  GGML_ASSERT(tensor->data == NULL);
1658
  GGML_ASSERT(tensor->view_src == NULL);
@@ -1662,7 +1663,7 @@ void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor
1662
 
1663
  tensor->buffer = buffer;
1664
  tensor->data = addr;
1665
- ggml_backend_buffer_init_tensor(buffer, tensor);
1666
  }
1667
 
1668
  static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
@@ -1708,7 +1709,8 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_
1708
  struct ggml_tensor * dst = node_copies[id];
1709
  if (dst->view_src != NULL) {
1710
  graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
1711
- ggml_backend_view_init(dst);
 
1712
  }
1713
  else {
1714
  ggml_backend_tensor_copy(src, dst);
@@ -1823,7 +1825,6 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
1823
  assert(g1->n_nodes == g2->n_nodes);
1824
 
1825
  for (int i = 0; i < g1->n_nodes; i++) {
1826
- //printf("eval %d/%d\n", i, g1->n_nodes);
1827
  struct ggml_tensor * t1 = g1->nodes[i];
1828
  struct ggml_tensor * t2 = g2->nodes[i];
1829
 
 
126
  return base;
127
  }
128
 
129
+ enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
130
  // init_tensor is optional
131
  if (buffer->iface.init_tensor) {
132
+ return buffer->iface.init_tensor(buffer, tensor);
133
  }
134
+ return GGML_STATUS_SUCCESS;
135
  }
136
 
137
  void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
 
1642
 
1643
  // utils
1644
 
1645
+ enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
1646
  GGML_ASSERT(tensor->buffer == NULL);
1647
  GGML_ASSERT(tensor->view_src != NULL);
1648
  GGML_ASSERT(tensor->view_src->buffer != NULL);
 
1650
 
1651
  tensor->buffer = tensor->view_src->buffer;
1652
  tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
1653
+ return ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
1654
  }
1655
 
1656
+ enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
1657
  GGML_ASSERT(tensor->buffer == NULL);
1658
  GGML_ASSERT(tensor->data == NULL);
1659
  GGML_ASSERT(tensor->view_src == NULL);
 
1663
 
1664
  tensor->buffer = buffer;
1665
  tensor->data = addr;
1666
+ return ggml_backend_buffer_init_tensor(buffer, tensor);
1667
  }
1668
 
1669
  static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
 
1709
  struct ggml_tensor * dst = node_copies[id];
1710
  if (dst->view_src != NULL) {
1711
  graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
1712
+ enum ggml_status status = ggml_backend_view_init(dst);
1713
+ GGML_ASSERT(status == GGML_STATUS_SUCCESS);
1714
  }
1715
  else {
1716
  ggml_backend_tensor_copy(src, dst);
 
1825
  assert(g1->n_nodes == g2->n_nodes);
1826
 
1827
  for (int i = 0; i < g1->n_nodes; i++) {
 
1828
  struct ggml_tensor * t1 = g1->nodes[i];
1829
  struct ggml_tensor * t2 = g2->nodes[i];
1830
 
ggml/src/ggml-cann/ggml-cann.cpp CHANGED
@@ -796,11 +796,11 @@ static bool need_transform(ggml_type type) {
796
  * @param buffer The CANN buffer from which to initialize the tensor.
797
  * @param tensor Pointer to the tensor to be initialized.
798
  */
799
- static void ggml_backend_cann_buffer_init_tensor(
800
  ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
801
  if (tensor->view_src != NULL && tensor->view_offs == 0) {
802
  GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
803
- return;
804
  }
805
 
806
  // TODO: can backend doesn't support quantized yet. Just leave the code
@@ -817,6 +817,7 @@ static void ggml_backend_cann_buffer_init_tensor(
817
  memset_size, 0, memset_size));
818
  }
819
  }
 
820
  }
821
 
822
  // TODO: need handle tensor which has paddings.
 
796
  * @param buffer The CANN buffer from which to initialize the tensor.
797
  * @param tensor Pointer to the tensor to be initialized.
798
  */
799
+ static enum ggml_status ggml_backend_cann_buffer_init_tensor(
800
  ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
801
  if (tensor->view_src != NULL && tensor->view_offs == 0) {
802
  GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
803
+ return GGML_STATUS_SUCCESS;
804
  }
805
 
806
  // TODO: can backend doesn't support quantized yet. Just leave the code
 
817
  memset_size, 0, memset_size));
818
  }
819
  }
820
+ return GGML_STATUS_SUCCESS;
821
  }
822
 
823
  // TODO: need handle tensor which has paddings.
ggml/src/ggml-cpu/amx/amx.cpp CHANGED
@@ -50,10 +50,11 @@ static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) {
50
  return (void *) (buffer->context);
51
  }
52
 
53
- static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
54
  tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);
55
 
56
  GGML_UNUSED(buffer);
 
57
  }
58
 
59
  static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
 
50
  return (void *) (buffer->context);
51
  }
52
 
53
+ static enum ggml_status ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
54
  tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);
55
 
56
  GGML_UNUSED(buffer);
57
+ return GGML_STATUS_SUCCESS;
58
  }
59
 
60
  static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp CHANGED
@@ -4135,10 +4135,11 @@ static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(con
4135
  return nullptr;
4136
  }
4137
 
4138
- static void ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
4139
  tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_aarch64_get_optimal_repack_type(tensor));
4140
 
4141
  GGML_UNUSED(buffer);
 
4142
  }
4143
 
4144
  static void ggml_backend_cpu_aarch64_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
 
4135
  return nullptr;
4136
  }
4137
 
4138
+ static enum ggml_status ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
4139
  tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_aarch64_get_optimal_repack_type(tensor));
4140
 
4141
  GGML_UNUSED(buffer);
4142
+ return GGML_STATUS_SUCCESS;
4143
  }
4144
 
4145
  static void ggml_backend_cpu_aarch64_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
ggml/src/ggml-cuda/ggml-cuda.cu CHANGED
@@ -540,12 +540,12 @@ static void * ggml_backend_cuda_buffer_get_base(ggml_backend_buffer_t buffer) {
540
  return ctx->dev_ptr;
541
  }
542
 
543
- static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
544
  ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
545
 
546
  if (tensor->view_src != NULL) {
547
  assert(tensor->view_src->buffer->buft == buffer->buft);
548
- return;
549
  }
550
 
551
  if (ggml_is_quantized(tensor->type) && tensor->view_src == nullptr && ggml_backend_buffer_get_usage(buffer) != GGML_BACKEND_BUFFER_USAGE_COMPUTE) {
@@ -558,6 +558,7 @@ static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, g
558
  CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size));
559
  }
560
  }
 
561
  }
562
 
563
  static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
@@ -792,7 +793,7 @@ static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buff
792
  GGML_UNUSED(buffer);
793
  }
794
 
795
- static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
796
  GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
797
 
798
  ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
@@ -838,6 +839,7 @@ static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buf
838
  }
839
  }
840
  tensor->extra = extra;
 
841
  }
842
 
843
  static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
 
540
  return ctx->dev_ptr;
541
  }
542
 
543
+ static enum ggml_status ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
544
  ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
545
 
546
  if (tensor->view_src != NULL) {
547
  assert(tensor->view_src->buffer->buft == buffer->buft);
548
+ return GGML_STATUS_SUCCESS;
549
  }
550
 
551
  if (ggml_is_quantized(tensor->type) && tensor->view_src == nullptr && ggml_backend_buffer_get_usage(buffer) != GGML_BACKEND_BUFFER_USAGE_COMPUTE) {
 
558
  CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size));
559
  }
560
  }
561
+ return GGML_STATUS_SUCCESS;
562
  }
563
 
564
  static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
 
793
  GGML_UNUSED(buffer);
794
  }
795
 
796
+ static enum ggml_status ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
797
  GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
798
 
799
  ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
 
839
  }
840
  }
841
  tensor->extra = extra;
842
+ return GGML_STATUS_SUCCESS;
843
  }
844
 
845
  static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
ggml/src/ggml-opencl/ggml-opencl.cpp CHANGED
@@ -1211,7 +1211,7 @@ static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer)
1211
  GGML_UNUSED(buffer);
1212
  }
1213
 
1214
- static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
1215
  ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
1216
 
1217
  ggml_cl2_init(buffer->buft->device);
@@ -1251,6 +1251,7 @@ static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer,
1251
  tensor->extra = extra;
1252
  }
1253
  }
 
1254
  }
1255
 
1256
  // The optimized gemm and gemv kernels are used for large matrices without batch.
 
1211
  GGML_UNUSED(buffer);
1212
  }
1213
 
1214
+ static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
1215
  ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
1216
 
1217
  ggml_cl2_init(buffer->buft->device);
 
1251
  tensor->extra = extra;
1252
  }
1253
  }
1254
+ return GGML_STATUS_SUCCESS;
1255
  }
1256
 
1257
  // The optimized gemm and gemv kernels are used for large matrices without batch.
ggml/src/ggml-rpc/ggml-rpc.cpp CHANGED
@@ -464,7 +464,7 @@ static rpc_tensor serialize_tensor(const ggml_tensor * tensor) {
464
  return result;
465
  }
466
 
467
- static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
468
  ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
469
 
470
  // CUDA backend on the server pads everything to 512 due to CUDA limitations.
@@ -478,6 +478,7 @@ static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, gg
478
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_INIT_TENSOR, &request, sizeof(request), nullptr, 0);
479
  GGML_ASSERT(status);
480
  }
 
481
  }
482
 
483
  static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
 
464
  return result;
465
  }
466
 
467
+ static enum ggml_status ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
468
  ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
469
 
470
  // CUDA backend on the server pads everything to 512 due to CUDA limitations.
 
478
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_INIT_TENSOR, &request, sizeof(request), nullptr, 0);
479
  GGML_ASSERT(status);
480
  }
481
+ return GGML_STATUS_SUCCESS;
482
  }
483
 
484
  static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
ggml/src/ggml-sycl/ggml-sycl.cpp CHANGED
@@ -323,14 +323,14 @@ static void * ggml_backend_sycl_buffer_get_base(ggml_backend_buffer_t buffer) {
323
  return ctx->dev_ptr;
324
  }
325
 
326
- static void
327
  ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
328
  ggml_tensor *tensor) try {
329
  ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
330
 
331
  if (tensor->view_src != NULL) {
332
  assert(tensor->view_src->buffer->buft == buffer->buft);
333
- return;
334
  }
335
 
336
  ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
@@ -348,6 +348,7 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
348
  padded_size - original_size).wait()));
349
  }
350
  }
 
351
  }
352
  catch (sycl::exception const &exc) {
353
  std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -729,7 +730,7 @@ static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buff
729
  GGML_UNUSED(buffer);
730
  }
731
 
732
- static void
733
  ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
734
  ggml_tensor *tensor) try {
735
  GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
@@ -804,6 +805,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
804
  }
805
  }
806
  tensor->extra = extra;
 
807
  }
808
  catch (sycl::exception const &exc) {
809
  std::cerr << exc.what() << "Exception caught at file:" << __FILE__
 
323
  return ctx->dev_ptr;
324
  }
325
 
326
+ static enum ggml_status
327
  ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
328
  ggml_tensor *tensor) try {
329
  ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
330
 
331
  if (tensor->view_src != NULL) {
332
  assert(tensor->view_src->buffer->buft == buffer->buft);
333
+ return GGML_STATUS_SUCCESS;
334
  }
335
 
336
  ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
 
348
  padded_size - original_size).wait()));
349
  }
350
  }
351
+ return GGML_STATUS_SUCCESS;
352
  }
353
  catch (sycl::exception const &exc) {
354
  std::cerr << exc.what() << "Exception caught at file:" << __FILE__
 
730
  GGML_UNUSED(buffer);
731
  }
732
 
733
+ static enum ggml_status
734
  ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
735
  ggml_tensor *tensor) try {
736
  GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
 
805
  }
806
  }
807
  tensor->extra = extra;
808
+ return GGML_STATUS_SUCCESS;
809
  }
810
  catch (sycl::exception const &exc) {
811
  std::cerr << exc.what() << "Exception caught at file:" << __FILE__
ggml/src/ggml-vulkan/ggml-vulkan.cpp CHANGED
@@ -7845,11 +7845,12 @@ static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) {
7845
  UNUSED(buffer);
7846
  }
7847
 
7848
- static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
7849
  VK_LOG_DEBUG("ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")");
7850
  if (tensor->view_src != nullptr) {
7851
  GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
7852
  }
 
7853
  }
7854
 
7855
  static void ggml_backend_vk_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
 
7845
  UNUSED(buffer);
7846
  }
7847
 
7848
+ static enum ggml_status ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
7849
  VK_LOG_DEBUG("ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")");
7850
  if (tensor->view_src != nullptr) {
7851
  GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
7852
  }
7853
+ return GGML_STATUS_SUCCESS;
7854
  }
7855
 
7856
  static void ggml_backend_vk_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {