Michael Podvitskiy slaren ggerganov commited on
Commit
151c676
·
unverified ·
1 Parent(s): d1b60e4

ggml : introduce ggml_status (ggml/750)

Browse files

* using enum as an exit code instead of macros

* update return type from enum to unsigned int

* indentation fix

* compound update
ggml_compute_exit_code -> ggml_status
changed ggml_status from a bit-field type to simple codes
ggml_status to string cast

* ggml_status to string cast

* GGML_CALL was removed

Co-authored-by: slaren <[email protected]>

---------

Co-authored-by: slaren <[email protected]>
Co-authored-by: Georgi Gerganov <[email protected]>

Files changed (11) hide show
  1. ggml-backend-impl.h +4 -3
  2. ggml-backend.c +18 -21
  3. ggml-backend.h +16 -15
  4. ggml-cuda.cu +2 -2
  5. ggml-kompute.cpp +2 -2
  6. ggml-metal.m +4 -4
  7. ggml-opencl.cpp +2 -2
  8. ggml-sycl.cpp +2 -2
  9. ggml-vulkan.cpp +2 -2
  10. ggml.c +23 -6
  11. ggml.h +13 -4
ggml-backend-impl.h CHANGED
@@ -91,13 +91,14 @@ extern "C" {
91
  // (optional) complete all pending operations
92
  void (*GGML_CALL synchronize)(ggml_backend_t backend);
93
 
94
- // compute graph with a plan
95
  ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
96
  void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
97
- void (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
98
 
 
 
99
  // compute graph without a plan (async)
100
- bool (*GGML_CALL graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
101
 
102
  // check if the backend supports an operation
103
  bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
 
91
  // (optional) complete all pending operations
92
  void (*GGML_CALL synchronize)(ggml_backend_t backend);
93
 
94
+ // create a plan for ggml_cgraph and free it
95
  ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
96
  void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
 
97
 
98
+ // compute graph with a plan
99
+ enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
100
  // compute graph without a plan (async)
101
+ enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
102
 
103
  // check if the backend supports an operation
104
  bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
ggml-backend.c CHANGED
@@ -262,11 +262,11 @@ void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_pla
262
  backend->iface.graph_plan_free(backend, plan);
263
  }
264
 
265
- void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
266
- backend->iface.graph_plan_compute(backend, plan);
267
  }
268
 
269
- bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
270
  return backend->iface.graph_compute(backend, cgraph);
271
  }
272
 
@@ -732,15 +732,15 @@ GGML_CALL static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, g
732
  GGML_UNUSED(backend);
733
  }
734
 
735
- GGML_CALL static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
736
  struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
737
 
738
- ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
739
 
740
  GGML_UNUSED(backend);
741
  }
742
 
743
- GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
744
  struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
745
 
746
  struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
@@ -755,8 +755,7 @@ GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, str
755
  cplan.abort_callback = cpu_ctx->abort_callback;
756
  cplan.abort_callback_data = cpu_ctx->abort_callback_data;
757
 
758
- ggml_graph_compute(cgraph, &cplan);
759
- return true;
760
  }
761
 
762
  GGML_CALL static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
@@ -1437,7 +1436,7 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
1437
  return true;
1438
  }
1439
 
1440
- static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
1441
  uint64_t copy_us[GGML_MAX_BACKENDS] = {0};
1442
  uint64_t compute_us[GGML_MAX_BACKENDS] = {0};
1443
 
@@ -1472,8 +1471,9 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
1472
 
1473
  uint64_t compute_start_us = ggml_time_us();
1474
  if (!sched->callback_eval) {
1475
- if (!ggml_backend_graph_compute(split_backend, &split->graph)) {
1476
- return false;
 
1477
  }
1478
  //ggml_backend_synchronize(split_backend); // necessary to measure compute time
1479
  } else {
@@ -1494,8 +1494,9 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
1494
 
1495
  struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1);
1496
 
1497
- if (!ggml_backend_graph_compute(split_backend, &gv)) {
1498
- return false;
 
1499
  }
1500
 
1501
  if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) {
@@ -1519,7 +1520,7 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
1519
  }
1520
  #endif
1521
 
1522
- return true;
1523
  }
1524
 
1525
  ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size) {
@@ -1581,7 +1582,7 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
1581
  return true;
1582
  }
1583
 
1584
- bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
1585
  GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS);
1586
 
1587
  if (!sched->is_reset) {
@@ -1590,14 +1591,10 @@ bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cg
1590
 
1591
  ggml_backend_sched_split_graph(sched, graph);
1592
  if (!ggml_backend_sched_alloc_splits(sched)) {
1593
- return false;
1594
  }
1595
 
1596
- if (!ggml_backend_sched_compute_splits(sched)) {
1597
- return false;
1598
- }
1599
-
1600
- return true;
1601
  }
1602
 
1603
  void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
 
262
  backend->iface.graph_plan_free(backend, plan);
263
  }
264
 
265
+ enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
266
+ return backend->iface.graph_plan_compute(backend, plan);
267
  }
268
 
269
+ enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
270
  return backend->iface.graph_compute(backend, cgraph);
271
  }
272
 
 
732
  GGML_UNUSED(backend);
733
  }
734
 
735
+ GGML_CALL static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
736
  struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
737
 
738
+ return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
739
 
740
  GGML_UNUSED(backend);
741
  }
742
 
743
+ GGML_CALL static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
744
  struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
745
 
746
  struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
 
755
  cplan.abort_callback = cpu_ctx->abort_callback;
756
  cplan.abort_callback_data = cpu_ctx->abort_callback_data;
757
 
758
+ return ggml_graph_compute(cgraph, &cplan);
 
759
  }
760
 
761
  GGML_CALL static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
 
1436
  return true;
1437
  }
1438
 
1439
+ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
1440
  uint64_t copy_us[GGML_MAX_BACKENDS] = {0};
1441
  uint64_t compute_us[GGML_MAX_BACKENDS] = {0};
1442
 
 
1471
 
1472
  uint64_t compute_start_us = ggml_time_us();
1473
  if (!sched->callback_eval) {
1474
+ enum ggml_status ec = ggml_backend_graph_compute(split_backend, &split->graph);
1475
+ if (ec != GGML_STATUS_SUCCESS) {
1476
+ return ec;
1477
  }
1478
  //ggml_backend_synchronize(split_backend); // necessary to measure compute time
1479
  } else {
 
1494
 
1495
  struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1);
1496
 
1497
+ enum ggml_status ec = ggml_backend_graph_compute(split_backend, &gv);
1498
+ if (ec != GGML_STATUS_SUCCESS) {
1499
+ return ec;
1500
  }
1501
 
1502
  if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) {
 
1520
  }
1521
  #endif
1522
 
1523
+ return GGML_STATUS_SUCCESS;
1524
  }
1525
 
1526
  ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size) {
 
1582
  return true;
1583
  }
1584
 
1585
+ enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
1586
  GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS);
1587
 
1588
  if (!sched->is_reset) {
 
1591
 
1592
  ggml_backend_sched_split_graph(sched, graph);
1593
  if (!ggml_backend_sched_alloc_splits(sched)) {
1594
+ return GGML_STATUS_ALLOC_FAILED;
1595
  }
1596
 
1597
+ return ggml_backend_sched_compute_splits(sched);
 
 
 
 
1598
  }
1599
 
1600
  void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
ggml-backend.h CHANGED
@@ -66,12 +66,13 @@ extern "C" {
66
 
67
  GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
68
 
69
- GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (ggml_backend_t backend, struct ggml_cgraph * cgraph);
 
70
 
71
- GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
72
- GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
73
- GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
74
- GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
75
 
76
  // tensor copy between different backends
77
  GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
@@ -157,26 +158,26 @@ extern "C" {
157
  typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
158
 
159
  // Initialize a backend scheduler
160
- GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
161
- GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
162
  // Initialize backend buffers from a measure graph
163
- GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
164
  // Get the number of splits of the last graph
165
- GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
166
 
167
- GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
168
 
169
- GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
170
- GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
171
 
172
  // Allocate and compute graph on the backend scheduler
173
- GGML_API bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
174
 
175
  // Reset all assignments and allocators - must be called before changing the node backends
176
- GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
177
 
178
  // Set a callback to be called for each resulting node during graph compute
179
- GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
180
 
181
  //
182
  // Utils
 
66
 
67
  GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
68
 
69
+ GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
70
+ GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
71
 
72
+ GGML_API enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
73
+ GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
74
+
75
+ GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
76
 
77
  // tensor copy between different backends
78
  GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
 
158
  typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
159
 
160
  // Initialize a backend scheduler
161
+ GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
162
+ GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
163
  // Initialize backend buffers from a measure graph
164
+ GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
165
  // Get the number of splits of the last graph
166
+ GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
167
 
168
+ GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
169
 
170
+ GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
171
+ GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
172
 
173
  // Allocate and compute graph on the backend scheduler
174
+ GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
175
 
176
  // Reset all assignments and allocators - must be called before changing the node backends
177
+ GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
178
 
179
  // Set a callback to be called for each resulting node during graph compute
180
+ GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
181
 
182
  //
183
  // Utils
ggml-cuda.cu CHANGED
@@ -12241,7 +12241,7 @@ GGML_CALL static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
12241
  UNUSED(backend);
12242
  }
12243
 
12244
- GGML_CALL static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
12245
  ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
12246
 
12247
  ggml_cuda_set_main_device(cuda_ctx->device);
@@ -12277,7 +12277,7 @@ GGML_CALL static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, gg
12277
  GGML_ASSERT(ok);
12278
  }
12279
 
12280
- return true;
12281
  }
12282
 
12283
  GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
 
12241
  UNUSED(backend);
12242
  }
12243
 
12244
+ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
12245
  ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
12246
 
12247
  ggml_cuda_set_main_device(cuda_ctx->device);
 
12277
  GGML_ASSERT(ok);
12278
  }
12279
 
12280
+ return GGML_STATUS_SUCCESS;
12281
  }
12282
 
12283
  GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
ggml-kompute.cpp CHANGED
@@ -1927,10 +1927,10 @@ static ggml_backend_buffer_type_t ggml_backend_kompute_get_default_buffer_type(g
1927
  return ggml_backend_kompute_buffer_type(ctx->device);
1928
  }
1929
 
1930
- static bool ggml_backend_kompute_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
1931
  auto * ctx = static_cast<ggml_kompute_context *>(backend->context);
1932
  ggml_vk_graph_compute(ctx, cgraph);
1933
- return true;
1934
  }
1935
 
1936
  static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
 
1927
  return ggml_backend_kompute_buffer_type(ctx->device);
1928
  }
1929
 
1930
+ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
1931
  auto * ctx = static_cast<ggml_kompute_context *>(backend->context);
1932
  ggml_vk_graph_compute(ctx, cgraph);
1933
+ return GGML_STATUS_SUCCESS;
1934
  }
1935
 
1936
  static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
ggml-metal.m CHANGED
@@ -748,7 +748,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
748
  }
749
  }
750
 
751
- static bool ggml_metal_graph_compute(
752
  struct ggml_metal_context * ctx,
753
  struct ggml_cgraph * gf) {
754
 
@@ -2484,7 +2484,7 @@ static bool ggml_metal_graph_compute(
2484
  MTLCommandBufferStatus status = [command_buffer status];
2485
  if (status != MTLCommandBufferStatusCompleted) {
2486
  GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
2487
- return false;
2488
  }
2489
  }
2490
 
@@ -2493,7 +2493,7 @@ static bool ggml_metal_graph_compute(
2493
  }
2494
 
2495
  }
2496
- return true;
2497
  }
2498
 
2499
  ////////////////////////////////////////////////////////////////////////////////
@@ -2795,7 +2795,7 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffe
2795
  UNUSED(backend);
2796
  }
2797
 
2798
- GGML_CALL static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
2799
  struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
2800
 
2801
  return ggml_metal_graph_compute(metal_ctx, cgraph);
 
748
  }
749
  }
750
 
751
+ static enum ggml_status ggml_metal_graph_compute(
752
  struct ggml_metal_context * ctx,
753
  struct ggml_cgraph * gf) {
754
 
 
2484
  MTLCommandBufferStatus status = [command_buffer status];
2485
  if (status != MTLCommandBufferStatusCompleted) {
2486
  GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
2487
+ return GGML_STATUS_FAILED;
2488
  }
2489
  }
2490
 
 
2493
  }
2494
 
2495
  }
2496
+ return GGML_STATUS_SUCCESS;
2497
  }
2498
 
2499
  ////////////////////////////////////////////////////////////////////////////////
 
2795
  UNUSED(backend);
2796
  }
2797
 
2798
+ GGML_CALL static enum ggml_status ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
2799
  struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
2800
 
2801
  return ggml_metal_graph_compute(metal_ctx, cgraph);
ggml-opencl.cpp CHANGED
@@ -2231,7 +2231,7 @@ static ggml_backend_buffer_type_t ggml_backend_opencl_get_default_buffer_type(gg
2231
  GGML_UNUSED(backend);
2232
  }
2233
 
2234
- static bool ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
2235
  for (int i = 0; i < graph->n_nodes; ++i) {
2236
  ggml_tensor * node = graph->nodes[i];
2237
  switch (node->op) {
@@ -2246,7 +2246,7 @@ static bool ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgrap
2246
  }
2247
  }
2248
 
2249
- return true;
2250
 
2251
  GGML_UNUSED(backend);
2252
  }
 
2231
  GGML_UNUSED(backend);
2232
  }
2233
 
2234
+ static ggml_status ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
2235
  for (int i = 0; i < graph->n_nodes; ++i) {
2236
  ggml_tensor * node = graph->nodes[i];
2237
  switch (node->op) {
 
2246
  }
2247
  }
2248
 
2249
+ return GGML_STATUS_SUCCESS;
2250
 
2251
  GGML_UNUSED(backend);
2252
  }
ggml-sycl.cpp CHANGED
@@ -15581,7 +15581,7 @@ catch (sycl::exception const &exc) {
15581
  std::exit(1);
15582
  }
15583
 
15584
- GGML_CALL static bool ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
15585
  ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
15586
  ggml_sycl_set_main_device(sycl_ctx->device);
15587
 
@@ -15613,7 +15613,7 @@ GGML_CALL static bool ggml_backend_sycl_graph_compute(ggml_backend_t backend, gg
15613
  GGML_ASSERT(ok);
15614
  }
15615
 
15616
- return true;
15617
  }
15618
 
15619
  GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
 
15581
  std::exit(1);
15582
  }
15583
 
15584
+ GGML_CALL static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
15585
  ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
15586
  ggml_sycl_set_main_device(sycl_ctx->device);
15587
 
 
15613
  GGML_ASSERT(ok);
15614
  }
15615
 
15616
+ return GGML_STATUS_SUCCESS;
15617
  }
15618
 
15619
  GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
ggml-vulkan.cpp CHANGED
@@ -5092,7 +5092,7 @@ GGML_CALL static void ggml_backend_vk_synchronize(ggml_backend_t backend) {
5092
  ctx->transfer_ctx = nullptr;
5093
  }
5094
 
5095
- GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
5096
  ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
5097
 
5098
  for (int i = 0; i < cgraph->n_nodes; i++) {
@@ -5135,7 +5135,7 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml
5135
 
5136
  ggml_vk_graph_cleanup(ctx);
5137
 
5138
- return true;
5139
 
5140
  UNUSED(backend);
5141
  }
 
5092
  ctx->transfer_ctx = nullptr;
5093
  }
5094
 
5095
+ GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
5096
  ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
5097
 
5098
  for (int i = 0; i < cgraph->n_nodes; i++) {
 
5135
 
5136
  ggml_vk_graph_cleanup(ctx);
5137
 
5138
+ return GGML_STATUS_SUCCESS;
5139
 
5140
  UNUSED(backend);
5141
  }
ggml.c CHANGED
@@ -320,6 +320,16 @@ static ggml_fp16_t ggml_table_exp_f16[1 << 16];
320
  // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
321
  float ggml_table_f32_f16[1 << 16];
322
 
 
 
 
 
 
 
 
 
 
 
323
  // note: do not use these inside ggml.c
324
  // these are meant to be used via the ggml.h API
325
  float ggml_fp16_to_fp32(ggml_fp16_t x) {
@@ -17400,6 +17410,7 @@ struct ggml_compute_state {
17400
  ggml_thread_t thrd;
17401
  int ith;
17402
  struct ggml_compute_state_shared * shared;
 
17403
  };
17404
 
17405
  static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
@@ -17693,7 +17704,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17693
  while (true) {
17694
  if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
17695
  state->shared->node_n += 1;
17696
- return (thread_ret_t) GGML_EXIT_ABORTED;
 
17697
  }
17698
 
17699
  if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
@@ -17815,7 +17827,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17815
  }
17816
  }
17817
 
17818
- return GGML_EXIT_SUCCESS;
17819
  }
17820
 
17821
  struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threads) {
@@ -18011,7 +18023,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
18011
  return cplan;
18012
  }
18013
 
18014
- int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
18015
  {
18016
  GGML_ASSERT(cplan);
18017
  GGML_ASSERT(cplan->n_threads > 0);
@@ -18055,6 +18067,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
18055
  .thrd = 0,
18056
  .ith = j,
18057
  .shared = &state_shared,
 
18058
  };
18059
 
18060
  const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
@@ -18065,12 +18078,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
18065
 
18066
  workers[0].ith = 0;
18067
  workers[0].shared = &state_shared;
 
18068
 
18069
  const int64_t perf_start_cycles = ggml_perf_cycles();
18070
  const int64_t perf_start_time_us = ggml_perf_time_us();
18071
 
18072
  // this is a work thread too
18073
- int compute_status = (size_t) ggml_graph_compute_thread(&workers[0]);
 
18074
 
18075
  // don't leave affinity set on the main thread
18076
  clear_numa_thread_affinity();
@@ -18080,6 +18095,8 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
18080
  for (int j = 1; j < n_threads; j++) {
18081
  const int rc = ggml_thread_join(workers[j].thrd, NULL);
18082
  GGML_ASSERT(rc == 0);
 
 
18083
  }
18084
  }
18085
 
@@ -18107,14 +18124,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
18107
  return compute_status;
18108
  }
18109
 
18110
- void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
18111
  struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
18112
 
18113
  struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
18114
 
18115
  cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
18116
 
18117
- ggml_graph_compute(cgraph, &cplan);
18118
  }
18119
 
18120
  struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
 
320
  // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
321
  float ggml_table_f32_f16[1 << 16];
322
 
323
+ const char * ggml_status_to_string(enum ggml_status status) {
324
+ switch (status) {
325
+ case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
326
+ case GGML_STATUS_FAILED: return "GGML status: error (operation failed)";
327
+ case GGML_STATUS_SUCCESS: return "GGML status: success";
328
+ case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)";
329
+ default: GGML_ASSERT(false);
330
+ }
331
+ }
332
+
333
  // note: do not use these inside ggml.c
334
  // these are meant to be used via the ggml.h API
335
  float ggml_fp16_to_fp32(ggml_fp16_t x) {
 
17410
  ggml_thread_t thrd;
17411
  int ith;
17412
  struct ggml_compute_state_shared * shared;
17413
+ enum ggml_status ec;
17414
  };
17415
 
17416
  static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
 
17704
  while (true) {
17705
  if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
17706
  state->shared->node_n += 1;
17707
+ state->ec = GGML_STATUS_ABORTED;
17708
+ return 0;
17709
  }
17710
 
17711
  if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
 
17827
  }
17828
  }
17829
 
17830
+ return 0;
17831
  }
17832
 
17833
  struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threads) {
 
18023
  return cplan;
18024
  }
18025
 
18026
+ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
18027
  {
18028
  GGML_ASSERT(cplan);
18029
  GGML_ASSERT(cplan->n_threads > 0);
 
18067
  .thrd = 0,
18068
  .ith = j,
18069
  .shared = &state_shared,
18070
+ .ec = GGML_STATUS_SUCCESS,
18071
  };
18072
 
18073
  const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
 
18078
 
18079
  workers[0].ith = 0;
18080
  workers[0].shared = &state_shared;
18081
+ workers[0].ec = GGML_STATUS_SUCCESS;
18082
 
18083
  const int64_t perf_start_cycles = ggml_perf_cycles();
18084
  const int64_t perf_start_time_us = ggml_perf_time_us();
18085
 
18086
  // this is a work thread too
18087
+ ggml_graph_compute_thread(&workers[0]);
18088
+ enum ggml_status compute_status = workers[0].ec;
18089
 
18090
  // don't leave affinity set on the main thread
18091
  clear_numa_thread_affinity();
 
18095
  for (int j = 1; j < n_threads; j++) {
18096
  const int rc = ggml_thread_join(workers[j].thrd, NULL);
18097
  GGML_ASSERT(rc == 0);
18098
+ if (workers[j].ec != GGML_STATUS_SUCCESS)
18099
+ compute_status = workers[j].ec;
18100
  }
18101
  }
18102
 
 
18124
  return compute_status;
18125
  }
18126
 
18127
+ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
18128
  struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
18129
 
18130
  struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
18131
 
18132
  cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
18133
 
18134
+ return ggml_graph_compute(cgraph, &cplan);
18135
  }
18136
 
18137
  struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
ggml.h CHANGED
@@ -315,6 +315,16 @@
315
  extern "C" {
316
  #endif
317
 
 
 
 
 
 
 
 
 
 
 
318
  typedef uint16_t ggml_fp16_t;
319
 
320
  // convert FP16 <-> FP32
@@ -1940,12 +1950,11 @@ extern "C" {
1940
 
1941
  // ggml_graph_plan() has to be called before ggml_graph_compute()
1942
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
1943
- GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
1944
- GGML_API int ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
1945
-
1946
  // same as ggml_graph_compute() but the work data is allocated as a part of the context
1947
  // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
1948
- GGML_API void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
1949
 
1950
  GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
1951
 
 
315
  extern "C" {
316
  #endif
317
 
318
+ enum ggml_status {
319
+ GGML_STATUS_ALLOC_FAILED = -2,
320
+ GGML_STATUS_FAILED = -1,
321
+ GGML_STATUS_SUCCESS = 0,
322
+ GGML_STATUS_ABORTED = 1,
323
+ };
324
+
325
+ // get ggml_status name string
326
+ GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status);
327
+
328
  typedef uint16_t ggml_fp16_t;
329
 
330
  // convert FP16 <-> FP32
 
1950
 
1951
  // ggml_graph_plan() has to be called before ggml_graph_compute()
1952
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
1953
+ GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
1954
+ GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
 
1955
  // same as ggml_graph_compute() but the work data is allocated as a part of the context
1956
  // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
1957
+ GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
1958
 
1959
  GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
1960