ggerganov commited on
Commit
2bd85b6
·
1 Parent(s): 88e7829

metal : fix thread-safety (llama/14300)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-metal/ggml-metal.m +60 -28
ggml/src/ggml-metal/ggml-metal.m CHANGED
@@ -48,22 +48,28 @@ static struct ggml_backend_metal_device_context {
48
  int mtl_device_ref_count;
49
  id<MTLLibrary> mtl_library;
50
 
 
 
51
  bool has_simdgroup_reduction;
52
  bool has_simdgroup_mm;
53
  bool has_residency_sets;
54
  bool has_bfloat;
55
  bool use_bfloat;
56
 
 
 
57
  char name[128];
58
  } g_ggml_ctx_dev_main = {
59
  /*.mtl_device =*/ nil,
60
  /*.mtl_device_ref_count =*/ 0,
61
  /*.mtl_library =*/ nil,
 
62
  /*.has_simdgroup_reduction =*/ false,
63
  /*.has_simdgroup_mm =*/ false,
64
  /*.has_residency_sets =*/ false,
65
  /*.has_bfloat =*/ false,
66
  /*.use_bfloat =*/ false,
 
67
  /*.name =*/ "",
68
  };
69
 
@@ -71,6 +77,10 @@ static struct ggml_backend_metal_device_context {
71
  static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_device_context * ctx) {
72
  assert(ctx != NULL);
73
 
 
 
 
 
74
  if (ctx->mtl_device == nil) {
75
  ctx->mtl_device = MTLCreateSystemDefaultDevice();
76
  }
@@ -94,6 +104,8 @@ static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_dev
94
  ctx->use_bfloat = false;
95
  #endif
96
 
 
 
97
  strncpy(ctx->name, [[ctx->mtl_device name] UTF8String], sizeof(ctx->name) - 1);
98
  }
99
 
@@ -110,6 +122,11 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
110
  ctx->mtl_device_ref_count--;
111
 
112
  if (ctx->mtl_device_ref_count == 0) {
 
 
 
 
 
113
  if (ctx->mtl_library) {
114
  [ctx->mtl_library release];
115
  ctx->mtl_library = nil;
@@ -977,7 +994,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
977
  struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
978
  struct ggml_backend_metal_device_context * ctx_dev = dev->context;
979
 
980
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
981
 
982
  GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
983
 
@@ -991,9 +1008,16 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
991
  ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
992
 
993
  // load library
994
- if (ctx_dev->mtl_library == nil) {
995
- ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat);
 
 
 
 
 
 
996
  }
 
997
  id<MTLLibrary> metal_library = ctx_dev->mtl_library;
998
  if (metal_library == nil) {
999
  GGML_LOG_ERROR("%s: error: metal library is nil\n", __func__);
@@ -5284,7 +5308,6 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
5284
  }
5285
 
5286
  ggml_backend_metal_buffer_rset_free(ctx);
5287
- ggml_backend_metal_device_rel(buffer->buft->device->context);
5288
 
5289
  if (ctx->owned) {
5290
  #if TARGET_OS_OSX
@@ -5393,7 +5416,10 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
5393
  }
5394
 
5395
  struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device->context;
5396
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
 
 
 
5397
 
5398
  ctx->all_data = ggml_metal_host_malloc(size_aligned);
5399
  ctx->all_size = size_aligned;
@@ -5416,14 +5442,12 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
5416
  if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
5417
  GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
5418
  free(ctx);
5419
- ggml_backend_metal_device_rel(ctx_dev);
5420
  return NULL;
5421
  }
5422
 
5423
  if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
5424
  GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
5425
  free(ctx);
5426
- ggml_backend_metal_device_rel(ctx_dev);
5427
  return NULL;
5428
  }
5429
 
@@ -5434,17 +5458,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
5434
 
5435
  static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
5436
  return 32;
 
5437
  GGML_UNUSED(buft);
5438
  }
5439
 
5440
  static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
5441
- id<MTLDevice> device = ggml_backend_metal_device_acq(buft->device->context);
5442
- const size_t max_size = device.maxBufferLength;
5443
- ggml_backend_metal_device_rel(buft->device->context);
5444
 
5445
  return max_size;
5446
-
5447
- GGML_UNUSED(buft);
5448
  }
5449
 
5450
  static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
@@ -5517,7 +5538,10 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
5517
  }
5518
 
5519
  struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main;
5520
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
 
 
 
5521
 
5522
  // the buffer fits into the max buffer size allowed by the device
5523
  if (size_aligned <= device.maxBufferLength) {
@@ -5573,7 +5597,6 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
5573
  if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
5574
  GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
5575
  free(ctx);
5576
- ggml_backend_metal_device_rel(ctx_dev);
5577
  return NULL;
5578
  }
5579
 
@@ -5589,10 +5612,8 @@ static const char * ggml_backend_metal_name(ggml_backend_t backend) {
5589
  }
5590
 
5591
  static void ggml_backend_metal_free(ggml_backend_t backend) {
5592
- struct ggml_backend_metal_context * ctx = backend->context;
5593
- struct ggml_backend_metal_device_context * ctx_dev = backend->device->context;
5594
 
5595
- ggml_backend_metal_device_rel(ctx_dev);
5596
  ggml_metal_free(ctx);
5597
 
5598
  free(backend);
@@ -5732,6 +5753,8 @@ bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family) {
5732
 
5733
  struct ggml_backend_metal_device_context * ctx_dev = backend->device->context;
5734
 
 
 
5735
  return [ctx_dev->mtl_device supportsFamily:(MTLGPUFamilyApple1 + family - 1)];
5736
  }
5737
 
@@ -5751,10 +5774,7 @@ static const char * ggml_backend_metal_device_get_name(ggml_backend_dev_t dev) {
5751
  }
5752
 
5753
  static const char * ggml_backend_metal_device_get_description(ggml_backend_dev_t dev) {
5754
- // acq/rel just to populate ctx->name in case it hasn't been done yet
5755
  struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
5756
- ggml_backend_metal_device_acq(ctx_dev);
5757
- ggml_backend_metal_device_rel(ctx_dev);
5758
 
5759
  return ctx_dev->name;
5760
  }
@@ -5762,12 +5782,10 @@ static const char * ggml_backend_metal_device_get_description(ggml_backend_dev_t
5762
  static void ggml_backend_metal_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
5763
  if (@available(macOS 10.12, iOS 16.0, *)) {
5764
  struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
5765
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
5766
 
5767
  *total = device.recommendedMaxWorkingSetSize;
5768
  *free = *total - device.currentAllocatedSize;
5769
-
5770
- ggml_backend_metal_device_rel(ctx_dev);
5771
  } else {
5772
  *free = 1;
5773
  *total = 1;
@@ -5845,7 +5863,10 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
5845
  }
5846
 
5847
  struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
5848
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
 
 
 
5849
 
5850
  // the buffer fits into the max buffer size allowed by the device
5851
  if (size_aligned <= device.maxBufferLength) {
@@ -5901,7 +5922,6 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
5901
  if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
5902
  GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
5903
  free(ctx);
5904
- ggml_backend_metal_device_rel(ctx_dev);
5905
  return NULL;
5906
  }
5907
 
@@ -5915,8 +5935,9 @@ static bool ggml_backend_metal_device_supports_op(ggml_backend_dev_t dev, const
5915
  }
5916
 
5917
  static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
5918
- return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name ||
5919
- buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
 
5920
 
5921
  GGML_UNUSED(dev);
5922
  }
@@ -6001,8 +6022,19 @@ static struct ggml_backend_reg_i ggml_backend_metal_reg_i = {
6001
  /* .get_proc_address = */ ggml_backend_metal_get_proc_address,
6002
  };
6003
 
 
 
 
 
 
 
6004
  ggml_backend_reg_t ggml_backend_metal_reg(void) {
6005
- // TODO: make this thread-safe somehow?
 
 
 
 
 
6006
  {
6007
  g_ggml_backend_metal_reg = (struct ggml_backend_reg) {
6008
  /* .api_version = */ GGML_BACKEND_API_VERSION,
 
48
  int mtl_device_ref_count;
49
  id<MTLLibrary> mtl_library;
50
 
51
+ NSLock * mtl_lock;
52
+
53
  bool has_simdgroup_reduction;
54
  bool has_simdgroup_mm;
55
  bool has_residency_sets;
56
  bool has_bfloat;
57
  bool use_bfloat;
58
 
59
+ size_t max_size;
60
+
61
  char name[128];
62
  } g_ggml_ctx_dev_main = {
63
  /*.mtl_device =*/ nil,
64
  /*.mtl_device_ref_count =*/ 0,
65
  /*.mtl_library =*/ nil,
66
+ /*.mtl_lock =*/ nil,
67
  /*.has_simdgroup_reduction =*/ false,
68
  /*.has_simdgroup_mm =*/ false,
69
  /*.has_residency_sets =*/ false,
70
  /*.has_bfloat =*/ false,
71
  /*.use_bfloat =*/ false,
72
+ /*.max_size =*/ 0,
73
  /*.name =*/ "",
74
  };
75
 
 
77
  static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_device_context * ctx) {
78
  assert(ctx != NULL);
79
 
80
+ if (ctx->mtl_lock == nil) {
81
+ ctx->mtl_lock = [[NSLock alloc] init];
82
+ }
83
+
84
  if (ctx->mtl_device == nil) {
85
  ctx->mtl_device = MTLCreateSystemDefaultDevice();
86
  }
 
104
  ctx->use_bfloat = false;
105
  #endif
106
 
107
+ ctx->max_size = ctx->mtl_device.maxBufferLength;
108
+
109
  strncpy(ctx->name, [[ctx->mtl_device name] UTF8String], sizeof(ctx->name) - 1);
110
  }
111
 
 
122
  ctx->mtl_device_ref_count--;
123
 
124
  if (ctx->mtl_device_ref_count == 0) {
125
+ if (ctx->mtl_lock) {
126
+ [ctx->mtl_lock release];
127
+ ctx->mtl_lock = nil;
128
+ }
129
+
130
  if (ctx->mtl_library) {
131
  [ctx->mtl_library release];
132
  ctx->mtl_library = nil;
 
994
  struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
995
  struct ggml_backend_metal_device_context * ctx_dev = dev->context;
996
 
997
+ id<MTLDevice> device = ctx_dev->mtl_device;
998
 
999
  GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
1000
 
 
1008
  ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
1009
 
1010
  // load library
1011
+ {
1012
+ [ctx_dev->mtl_lock lock];
1013
+
1014
+ if (ctx_dev->mtl_library == nil) {
1015
+ ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat);
1016
+ }
1017
+
1018
+ [ctx_dev->mtl_lock unlock];
1019
  }
1020
+
1021
  id<MTLLibrary> metal_library = ctx_dev->mtl_library;
1022
  if (metal_library == nil) {
1023
  GGML_LOG_ERROR("%s: error: metal library is nil\n", __func__);
 
5308
  }
5309
 
5310
  ggml_backend_metal_buffer_rset_free(ctx);
 
5311
 
5312
  if (ctx->owned) {
5313
  #if TARGET_OS_OSX
 
5416
  }
5417
 
5418
  struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device->context;
5419
+
5420
+ GGML_ASSERT(ctx_dev->mtl_device != nil);
5421
+
5422
+ id<MTLDevice> device = ctx_dev->mtl_device;
5423
 
5424
  ctx->all_data = ggml_metal_host_malloc(size_aligned);
5425
  ctx->all_size = size_aligned;
 
5442
  if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
5443
  GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
5444
  free(ctx);
 
5445
  return NULL;
5446
  }
5447
 
5448
  if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
5449
  GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
5450
  free(ctx);
 
5451
  return NULL;
5452
  }
5453
 
 
5458
 
5459
  static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
5460
  return 32;
5461
+
5462
  GGML_UNUSED(buft);
5463
  }
5464
 
5465
  static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
5466
+ const size_t max_size = ((struct ggml_backend_metal_device_context *)buft->device->context)->max_size;
 
 
5467
 
5468
  return max_size;
 
 
5469
  }
5470
 
5471
  static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
 
5538
  }
5539
 
5540
  struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main;
5541
+
5542
+ GGML_ASSERT(ctx_dev->mtl_device != nil);
5543
+
5544
+ id<MTLDevice> device = ctx_dev->mtl_device;
5545
 
5546
  // the buffer fits into the max buffer size allowed by the device
5547
  if (size_aligned <= device.maxBufferLength) {
 
5597
  if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
5598
  GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
5599
  free(ctx);
 
5600
  return NULL;
5601
  }
5602
 
 
5612
  }
5613
 
5614
  static void ggml_backend_metal_free(ggml_backend_t backend) {
5615
+ struct ggml_backend_metal_context * ctx = backend->context;
 
5616
 
 
5617
  ggml_metal_free(ctx);
5618
 
5619
  free(backend);
 
5753
 
5754
  struct ggml_backend_metal_device_context * ctx_dev = backend->device->context;
5755
 
5756
+ GGML_ASSERT(ctx_dev->mtl_device != nil);
5757
+
5758
  return [ctx_dev->mtl_device supportsFamily:(MTLGPUFamilyApple1 + family - 1)];
5759
  }
5760
 
 
5774
  }
5775
 
5776
  static const char * ggml_backend_metal_device_get_description(ggml_backend_dev_t dev) {
 
5777
  struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
 
 
5778
 
5779
  return ctx_dev->name;
5780
  }
 
5782
  static void ggml_backend_metal_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
5783
  if (@available(macOS 10.12, iOS 16.0, *)) {
5784
  struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
5785
+ id<MTLDevice> device = ctx_dev->mtl_device;
5786
 
5787
  *total = device.recommendedMaxWorkingSetSize;
5788
  *free = *total - device.currentAllocatedSize;
 
 
5789
  } else {
5790
  *free = 1;
5791
  *total = 1;
 
5863
  }
5864
 
5865
  struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
5866
+
5867
+ GGML_ASSERT(ctx_dev->mtl_device != nil);
5868
+
5869
+ id<MTLDevice> device = ctx_dev->mtl_device;
5870
 
5871
  // the buffer fits into the max buffer size allowed by the device
5872
  if (size_aligned <= device.maxBufferLength) {
 
5922
  if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
5923
  GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
5924
  free(ctx);
 
5925
  return NULL;
5926
  }
5927
 
 
5935
  }
5936
 
5937
  static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
5938
+ return
5939
+ buft->iface.get_name == ggml_backend_metal_buffer_type_get_name ||
5940
+ buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
5941
 
5942
  GGML_UNUSED(dev);
5943
  }
 
6022
  /* .get_proc_address = */ ggml_backend_metal_get_proc_address,
6023
  };
6024
 
6025
+ // called upon program exit
6026
+ static void ggml_metal_cleanup(void) {
6027
+ ggml_backend_metal_device_rel(&g_ggml_ctx_dev_main);
6028
+ }
6029
+
6030
+ // TODO: make thread-safe
6031
  ggml_backend_reg_t ggml_backend_metal_reg(void) {
6032
+ ggml_backend_metal_device_acq(&g_ggml_ctx_dev_main);
6033
+
6034
+ // register cleanup callback
6035
+ // TODO: not ideal, but not sure if there is a better way to do this in Objective-C
6036
+ atexit(ggml_metal_cleanup);
6037
+
6038
  {
6039
  g_ggml_backend_metal_reg = (struct ggml_backend_reg) {
6040
  /* .api_version = */ GGML_BACKEND_API_VERSION,