uvos commited on
Commit
089afa0
·
1 Parent(s): 7b7c5d3

hip : Add hipGraph and VMM support to ROCM (llama/11362)

Browse files
ggml/CMakeLists.txt CHANGED
@@ -154,6 +154,7 @@ option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashA
154
  option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
155
 
156
  option(GGML_HIP "ggml: use HIP" OFF)
 
157
  option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
158
  option(GGML_VULKAN "ggml: use Vulkan" OFF)
159
  option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
 
154
  option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
155
 
156
  option(GGML_HIP "ggml: use HIP" OFF)
157
+ option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
158
  option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
159
  option(GGML_VULKAN "ggml: use Vulkan" OFF)
160
  option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
ggml/src/ggml-cuda/common.cuh CHANGED
@@ -588,7 +588,7 @@ struct ggml_tensor_extra_gpu {
588
  };
589
 
590
 
591
- #if (CUDART_VERSION >= 12000) && defined(GGML_CUDA_USE_GRAPHS)
592
  #define USE_CUDA_GRAPH
593
  #endif
594
 
 
588
  };
589
 
590
 
591
+ #if ((CUDART_VERSION >= 12000) && defined(GGML_CUDA_USE_GRAPHS)) || defined(GGML_HIP_GRAPHS)
592
  #define USE_CUDA_GRAPH
593
  #endif
594
 
ggml/src/ggml-cuda/ggml-cuda.cu CHANGED
@@ -62,7 +62,7 @@ static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");
62
  [[noreturn]]
63
  void ggml_cuda_error(const char * stmt, const char * func, const char * file, int line, const char * msg) {
64
  int id = -1; // in case cudaGetDevice fails
65
- cudaGetDevice(&id);
66
 
67
  GGML_LOG_ERROR(GGML_CUDA_NAME " error: %s\n", msg);
68
  GGML_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
@@ -152,7 +152,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
152
  for (int id = 0; id < info.device_count; ++id) {
153
  int device_vmm = 0;
154
 
155
- #if !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
156
  CUdevice device;
157
  CU_CHECK(cuDeviceGet(&device, id));
158
  CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
@@ -164,7 +164,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
164
  alloc_prop.location.id = id;
165
  CU_CHECK(cuMemGetAllocationGranularity(&info.devices[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
166
  }
167
- #endif // !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
168
  info.devices[id].vmm = !!device_vmm;
169
 
170
  cudaDeviceProp prop;
@@ -300,7 +300,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
300
  };
301
 
302
  // pool with virtual memory
303
- #if !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
304
  struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
305
  static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
306
 
@@ -309,6 +309,9 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
309
  size_t pool_used = 0;
310
  size_t pool_size = 0;
311
  size_t granularity;
 
 
 
312
 
313
  explicit ggml_cuda_pool_vmm(int device) :
314
  device(device),
@@ -317,7 +320,14 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
317
 
318
  ~ggml_cuda_pool_vmm() {
319
  if (pool_addr != 0) {
 
 
 
 
 
 
320
  CU_CHECK(cuMemUnmap(pool_addr, pool_size));
 
321
  CU_CHECK(cuMemAddressFree(pool_addr, CUDA_POOL_VMM_MAX_SIZE));
322
  }
323
  }
@@ -350,7 +360,11 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
350
  }
351
 
352
  // map at the end of the pool
353
- CU_CHECK(cuMemMap(pool_addr + pool_size, reserve_size, 0, handle, 0));
 
 
 
 
354
 
355
  // the memory allocation handle is no longer needed after mapping
356
  CU_CHECK(cuMemRelease(handle));
@@ -360,7 +374,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
360
  access.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
361
  access.location.id = device;
362
  access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
363
- CU_CHECK(cuMemSetAccess(pool_addr + pool_size, reserve_size, &access, 1));
364
 
365
  // add to the pool
366
  pool_size += reserve_size;
@@ -372,7 +386,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
372
 
373
  GGML_ASSERT(pool_addr != 0);
374
 
375
- void * ptr = (void *) (pool_addr + pool_used);
376
  *actual_size = size;
377
  pool_used += size;
378
 
@@ -391,17 +405,17 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
391
  pool_used -= size;
392
 
393
  // all deallocations must be in reverse order of the allocations
394
- GGML_ASSERT(ptr == (void *) (pool_addr + pool_used));
395
  }
396
  };
397
- #endif // !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
398
 
399
  std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
400
- #if !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
401
  if (ggml_cuda_info().devices[device].vmm) {
402
  return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
403
  }
404
- #endif // !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
405
  return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg(device));
406
  }
407
 
@@ -547,7 +561,7 @@ static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffer(ggml_bac
547
  cudaError_t err = ggml_cuda_device_malloc(&dev_ptr, size, buft_ctx->device);
548
  if (err != cudaSuccess) {
549
  // clear the error
550
- cudaGetLastError();
551
  GGML_LOG_ERROR("%s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n", __func__, size / 1024.0 / 1024.0, buft_ctx->device, cudaGetErrorString(err));
552
  return nullptr;
553
  }
@@ -962,7 +976,7 @@ static void * ggml_cuda_host_malloc(size_t size) {
962
  cudaError_t err = cudaMallocHost((void **) &ptr, size);
963
  if (err != cudaSuccess) {
964
  // clear the error
965
- cudaGetLastError();
966
  GGML_LOG_DEBUG("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
967
  size / 1024.0 / 1024.0, cudaGetErrorString(err));
968
  return nullptr;
@@ -1209,7 +1223,7 @@ static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) {
1209
  CUDA_CHECK(err);
1210
  } else {
1211
  // reset the error
1212
- cudaGetLastError();
1213
  }
1214
  } else {
1215
  cudaError_t err = cudaDeviceDisablePeerAccess(id_other);
@@ -1217,7 +1231,7 @@ static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) {
1217
  CUDA_CHECK(err);
1218
  } else {
1219
  // reset the error
1220
- cudaGetLastError();
1221
  }
1222
  }
1223
  }
@@ -2452,7 +2466,7 @@ static void maintain_cuda_graph(ggml_backend_cuda_context * cuda_ctx, std::vecto
2452
  if (stat == cudaErrorInvalidDeviceFunction) {
2453
  // Fails due to incorrect handling by CUDA runtime of CUDA BLAS node.
2454
  // We don't need to update blas nodes, so clear error and move on.
2455
- cudaGetLastError();
2456
  } else {
2457
  GGML_ASSERT(stat == cudaSuccess);
2458
  }
@@ -2507,14 +2521,20 @@ static bool is_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx,
2507
  static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
2508
 
2509
  cudaGraphExecUpdateResultInfo result_info;
 
 
 
 
2510
  cudaError_t stat = cudaGraphExecUpdate(cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, &result_info);
 
2511
  if (stat == cudaErrorGraphExecUpdateFailure) {
2512
  #ifndef NDEBUG
2513
  GGML_LOG_DEBUG("%s: CUDA graph update failed\n", __func__);
2514
  #endif
 
2515
  // The pre-existing graph exec cannot be updated due to violated constraints
2516
  // so instead clear error and re-instantiate
2517
- cudaGetLastError();
2518
  CUDA_CHECK(cudaGraphExecDestroy(cuda_ctx->cuda_graph->instance));
2519
  cuda_ctx->cuda_graph->instance = nullptr;
2520
  CUDA_CHECK(cudaGraphInstantiate(&cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, NULL, NULL, 0));
@@ -2742,7 +2762,7 @@ bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size) {
2742
  cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterPortable | cudaHostRegisterReadOnly);
2743
  if (err != cudaSuccess) {
2744
  // clear the error
2745
- cudaGetLastError();
2746
 
2747
  GGML_LOG_DEBUG("%s: failed to register %.2f MiB of pinned memory: %s\n", __func__,
2748
  size / 1024.0 / 1024.0, cudaGetErrorString(err));
@@ -2762,7 +2782,7 @@ void ggml_backend_cuda_unregister_host_buffer(void * buffer) {
2762
  cudaError_t err = cudaHostUnregister(buffer);
2763
  if (err != cudaSuccess) {
2764
  // clear the error
2765
- cudaGetLastError();
2766
  }
2767
  }
2768
 
 
62
  [[noreturn]]
63
  void ggml_cuda_error(const char * stmt, const char * func, const char * file, int line, const char * msg) {
64
  int id = -1; // in case cudaGetDevice fails
65
+ (void)cudaGetDevice(&id);
66
 
67
  GGML_LOG_ERROR(GGML_CUDA_NAME " error: %s\n", msg);
68
  GGML_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
 
152
  for (int id = 0; id < info.device_count; ++id) {
153
  int device_vmm = 0;
154
 
155
+ #if !defined(GGML_CUDA_NO_VMM)
156
  CUdevice device;
157
  CU_CHECK(cuDeviceGet(&device, id));
158
  CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
 
164
  alloc_prop.location.id = id;
165
  CU_CHECK(cuMemGetAllocationGranularity(&info.devices[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
166
  }
167
+ #endif // !defined(GGML_CUDA_NO_VMM)
168
  info.devices[id].vmm = !!device_vmm;
169
 
170
  cudaDeviceProp prop;
 
300
  };
301
 
302
  // pool with virtual memory
303
+ #if !defined(GGML_CUDA_NO_VMM)
304
  struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
305
  static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
306
 
 
309
  size_t pool_used = 0;
310
  size_t pool_size = 0;
311
  size_t granularity;
312
+ #if defined(GGML_USE_HIP)
313
+ std::vector<std::pair<CUdeviceptr, size_t>> mappings;
314
+ #endif
315
 
316
  explicit ggml_cuda_pool_vmm(int device) :
317
  device(device),
 
320
 
321
  ~ggml_cuda_pool_vmm() {
322
  if (pool_addr != 0) {
323
+ #if defined(GGML_USE_HIP)
324
+ // Workaround for https://github.com/ROCm/ROCR-Runtime/issues/285
325
+ for (std::pair<CUdeviceptr, size_t> & mapping : mappings) {
326
+ CU_CHECK(cuMemUnmap(mapping.first, mapping.second));
327
+ }
328
+ #else
329
  CU_CHECK(cuMemUnmap(pool_addr, pool_size));
330
+ #endif
331
  CU_CHECK(cuMemAddressFree(pool_addr, CUDA_POOL_VMM_MAX_SIZE));
332
  }
333
  }
 
360
  }
361
 
362
  // map at the end of the pool
363
+ CUdeviceptr start_ptr = (CUdeviceptr)((char *)(pool_addr) + pool_size);
364
+ CU_CHECK(cuMemMap(start_ptr, reserve_size, 0, handle, 0));
365
+ #if defined(GGML_USE_HIP)
366
+ mappings.push_back({start_ptr, reserve_size});
367
+ #endif
368
 
369
  // the memory allocation handle is no longer needed after mapping
370
  CU_CHECK(cuMemRelease(handle));
 
374
  access.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
375
  access.location.id = device;
376
  access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
377
+ CU_CHECK(cuMemSetAccess((CUdeviceptr)((char *)(pool_addr) + pool_size), reserve_size, &access, 1));
378
 
379
  // add to the pool
380
  pool_size += reserve_size;
 
386
 
387
  GGML_ASSERT(pool_addr != 0);
388
 
389
+ void * ptr = (void *) ((CUdeviceptr)((char *)(pool_addr) + pool_used));
390
  *actual_size = size;
391
  pool_used += size;
392
 
 
405
  pool_used -= size;
406
 
407
  // all deallocations must be in reverse order of the allocations
408
+ GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used));
409
  }
410
  };
411
+ #endif // !defined(GGML_CUDA_NO_VMM)
412
 
413
  std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
414
+ #if !defined(GGML_CUDA_NO_VMM)
415
  if (ggml_cuda_info().devices[device].vmm) {
416
  return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
417
  }
418
+ #endif // !defined(GGML_CUDA_NO_VMM)
419
  return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg(device));
420
  }
421
 
 
561
  cudaError_t err = ggml_cuda_device_malloc(&dev_ptr, size, buft_ctx->device);
562
  if (err != cudaSuccess) {
563
  // clear the error
564
+ (void)cudaGetLastError();
565
  GGML_LOG_ERROR("%s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n", __func__, size / 1024.0 / 1024.0, buft_ctx->device, cudaGetErrorString(err));
566
  return nullptr;
567
  }
 
976
  cudaError_t err = cudaMallocHost((void **) &ptr, size);
977
  if (err != cudaSuccess) {
978
  // clear the error
979
+ (void)cudaGetLastError();
980
  GGML_LOG_DEBUG("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
981
  size / 1024.0 / 1024.0, cudaGetErrorString(err));
982
  return nullptr;
 
1223
  CUDA_CHECK(err);
1224
  } else {
1225
  // reset the error
1226
+ (void)cudaGetLastError();
1227
  }
1228
  } else {
1229
  cudaError_t err = cudaDeviceDisablePeerAccess(id_other);
 
1231
  CUDA_CHECK(err);
1232
  } else {
1233
  // reset the error
1234
+ (void)cudaGetLastError();
1235
  }
1236
  }
1237
  }
 
2466
  if (stat == cudaErrorInvalidDeviceFunction) {
2467
  // Fails due to incorrect handling by CUDA runtime of CUDA BLAS node.
2468
  // We don't need to update blas nodes, so clear error and move on.
2469
+ (void)cudaGetLastError();
2470
  } else {
2471
  GGML_ASSERT(stat == cudaSuccess);
2472
  }
 
2521
  static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
2522
 
2523
  cudaGraphExecUpdateResultInfo result_info;
2524
+ #ifdef __HIP_PLATFORM_AMD__
2525
+ hipGraphNode_t errorNode;
2526
+ hipError_t stat = hipGraphExecUpdate(cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, &errorNode, &result_info);
2527
+ #else
2528
  cudaError_t stat = cudaGraphExecUpdate(cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, &result_info);
2529
+ #endif
2530
  if (stat == cudaErrorGraphExecUpdateFailure) {
2531
  #ifndef NDEBUG
2532
  GGML_LOG_DEBUG("%s: CUDA graph update failed\n", __func__);
2533
  #endif
2534
+
2535
  // The pre-existing graph exec cannot be updated due to violated constraints
2536
  // so instead clear error and re-instantiate
2537
+ (void)cudaGetLastError();
2538
  CUDA_CHECK(cudaGraphExecDestroy(cuda_ctx->cuda_graph->instance));
2539
  cuda_ctx->cuda_graph->instance = nullptr;
2540
  CUDA_CHECK(cudaGraphInstantiate(&cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, NULL, NULL, 0));
 
2762
  cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterPortable | cudaHostRegisterReadOnly);
2763
  if (err != cudaSuccess) {
2764
  // clear the error
2765
+ (void)cudaGetLastError();
2766
 
2767
  GGML_LOG_DEBUG("%s: failed to register %.2f MiB of pinned memory: %s\n", __func__,
2768
  size / 1024.0 / 1024.0, cudaGetErrorString(err));
 
2782
  cudaError_t err = cudaHostUnregister(buffer);
2783
  if (err != cudaSuccess) {
2784
  // clear the error
2785
+ (void)cudaGetLastError();
2786
  }
2787
  }
2788
 
ggml/src/ggml-cuda/vendors/hip.h CHANGED
@@ -19,6 +19,12 @@
19
  #define CUBLAS_TF32_TENSOR_OP_MATH 0
20
  #define CUDA_R_16F HIPBLAS_R_16F
21
  #define CUDA_R_32F HIPBLAS_R_32F
 
 
 
 
 
 
22
  #define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
23
  #define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
24
  #define cublasCreate hipblasCreate
@@ -74,6 +80,21 @@
74
  #define cudaMemGetInfo hipMemGetInfo
75
  #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
76
  #define cudaSetDevice hipSetDevice
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  #define cudaStreamCreateWithFlags hipStreamCreateWithFlags
78
  #define cudaStreamDestroy hipStreamDestroy
79
  #define cudaStreamFireAndForget hipStreamFireAndForget
@@ -81,6 +102,28 @@
81
  #define cudaStreamPerThread hipStreamPerThread
82
  #define cudaStreamSynchronize hipStreamSynchronize
83
  #define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  #define cudaStream_t hipStream_t
85
  #define cudaSuccess hipSuccess
86
  #define __trap() do { abort(); __builtin_unreachable(); } while(0)
 
19
  #define CUBLAS_TF32_TENSOR_OP_MATH 0
20
  #define CUDA_R_16F HIPBLAS_R_16F
21
  #define CUDA_R_32F HIPBLAS_R_32F
22
+ #define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
23
+ #define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended
24
+ #define CU_MEM_ALLOCATION_TYPE_PINNED hipMemAllocationTypePinned
25
+ #define CU_MEM_LOCATION_TYPE_DEVICE hipMemLocationTypeDevice
26
+ #define CU_MEM_ACCESS_FLAGS_PROT_READWRITE hipMemAccessFlagsProtReadWrite
27
+ #define CU_CHECK(fn) {hipError_t err = fn; if(err != hipSuccess) { GGML_ABORT("HipVMM Failure: %s\n", hipGetErrorString(err)); }}
28
  #define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
29
  #define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
30
  #define cublasCreate hipblasCreate
 
80
  #define cudaMemGetInfo hipMemGetInfo
81
  #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
82
  #define cudaSetDevice hipSetDevice
83
+ #define cuDeviceGet hipDeviceGet
84
+ #define CUdevice hipDevice_t
85
+ #define CUdeviceptr hipDeviceptr_t
86
+ #define cuMemUnmap hipMemUnmap
87
+ #define CUmemAccessDesc hipMemAccessDesc
88
+ #define cuMemAddressFree hipMemAddressFree
89
+ #define cuMemRelease hipMemRelease
90
+ #define CUmemGenericAllocationHandle hipMemGenericAllocationHandle_t
91
+ #define cuMemCreate hipMemCreate
92
+ #define cuMemAddressReserve hipMemAddressReserve
93
+ #define cuMemMap hipMemMap
94
+ #define cuMemSetAccess hipMemSetAccess
95
+ #define cuMemGetAllocationGranularity hipMemGetAllocationGranularity
96
+ #define CUmemAllocationProp hipMemAllocationProp
97
+ #define cuDeviceGetAttribute hipDeviceGetAttribute
98
  #define cudaStreamCreateWithFlags hipStreamCreateWithFlags
99
  #define cudaStreamDestroy hipStreamDestroy
100
  #define cudaStreamFireAndForget hipStreamFireAndForget
 
102
  #define cudaStreamPerThread hipStreamPerThread
103
  #define cudaStreamSynchronize hipStreamSynchronize
104
  #define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
105
+ #define cudaGraphExec_t hipGraphExec_t
106
+ #define cudaGraphNode_t hipGraphNode_t
107
+ #define cudaKernelNodeParams hipKernelNodeParams
108
+ #define cudaKernelNodeParams hipKernelNodeParams
109
+ #define cudaGraphExecDestroy hipGraphExecDestroy
110
+ #define cudaGraphLaunch hipGraphLaunch
111
+ #define cudaErrorGraphExecUpdateFailure hipErrorGraphExecUpdateFailure
112
+ #define cudaGraphExecUpdateResultInfo hipGraphExecUpdateResult
113
+ #define cudaGraphNodeType hipGraphNodeType
114
+ #define cudaGraphNodeTypeKernel hipGraphNodeTypeKernel
115
+ #define cudaGraphInstantiate hipGraphInstantiate
116
+ #define cudaStreamEndCapture hipStreamEndCapture
117
+ #define cudaGraphDestroy hipGraphDestroy
118
+ #define cudaGraphKernelNodeSetParams hipGraphKernelNodeSetParams
119
+ #define cudaErrorInvalidDeviceFunction hipErrorInvalidDeviceFunction
120
+ #define cudaGraphKernelNodeGetParams hipGraphKernelNodeGetParams
121
+ #define cudaGraphNodeGetType hipGraphNodeGetType
122
+ #define cudaGraphGetNodes hipGraphGetNodes
123
+ #define cudaGraphExecUpdate hipGraphExecUpdate
124
+ #define cudaStreamCaptureModeRelaxed hipStreamCaptureModeRelaxed
125
+ #define cudaStreamBeginCapture hipStreamBeginCapture
126
+ #define cudaGraph_t hipGraph_t
127
  #define cudaStream_t hipStream_t
128
  #define cudaSuccess hipSuccess
129
  #define __trap() do { abort(); __builtin_unreachable(); } while(0)
ggml/src/ggml-hip/CMakeLists.txt CHANGED
@@ -92,6 +92,14 @@ if (GGML_CUDA_NO_PEER_COPY)
92
  add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
93
  endif()
94
 
 
 
 
 
 
 
 
 
95
  if (CXX_IS_HIPCC)
96
  set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
97
  target_link_libraries(ggml-hip PRIVATE hip::device)
 
92
  add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
93
  endif()
94
 
95
+ if (GGML_HIP_GRAPHS)
96
+ add_compile_definitions(GGML_HIP_GRAPHS)
97
+ endif()
98
+
99
+ if (GGML_CUDA_NO_VMM)
100
+ add_compile_definitions(GGML_CUDA_NO_VMM)
101
+ endif()
102
+
103
  if (CXX_IS_HIPCC)
104
  set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
105
  target_link_libraries(ggml-hip PRIVATE hip::device)