Spaces:
Running
Running
Commit
·
c8284f2
1
Parent(s):
8fca6dd
ggml-rpc: chunk send()/recv() to avoid EINVAL for very large tensors over RPC (macOS & others) (llama/15188)
Browse files* ggml-rpc: chunk send()/recv() to avoid EINVAL for very large tensors over RPC (macOS & others). Fixes #15055
* ggml-rpc: rename RPC_IO_CHUNK->MAX_CHUNK_SIZE, use std::min() for cap, switch to GGML_LOG_ERROR, handle 0-length send/recv
* rpc: drop n==0 special case in send_data(); retry in loop per review
* rpc: remove trailing whitespace in send_data()
---------
Co-authored-by: Shinnosuke Takagi <[email protected]>
ggml/src/ggml-rpc/ggml-rpc.cpp
CHANGED
|
@@ -29,9 +29,12 @@
|
|
| 29 |
#include <cstring>
|
| 30 |
#include <fstream>
|
| 31 |
#include <filesystem>
|
|
|
|
| 32 |
|
| 33 |
namespace fs = std::filesystem;
|
| 34 |
|
|
|
|
|
|
|
| 35 |
#ifdef _WIN32
|
| 36 |
typedef SOCKET sockfd_t;
|
| 37 |
using ssize_t = __int64;
|
|
@@ -323,11 +326,14 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
|
|
| 323 |
static bool send_data(sockfd_t sockfd, const void * data, size_t size) {
|
| 324 |
size_t bytes_sent = 0;
|
| 325 |
while (bytes_sent < size) {
|
| 326 |
-
|
|
|
|
| 327 |
if (n < 0) {
|
|
|
|
|
|
|
| 328 |
return false;
|
| 329 |
}
|
| 330 |
-
bytes_sent += n;
|
| 331 |
}
|
| 332 |
return true;
|
| 333 |
}
|
|
@@ -335,11 +341,18 @@ static bool send_data(sockfd_t sockfd, const void * data, size_t size) {
|
|
| 335 |
static bool recv_data(sockfd_t sockfd, void * data, size_t size) {
|
| 336 |
size_t bytes_recv = 0;
|
| 337 |
while (bytes_recv < size) {
|
| 338 |
-
|
| 339 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
return false;
|
| 341 |
}
|
| 342 |
-
bytes_recv += n;
|
| 343 |
}
|
| 344 |
return true;
|
| 345 |
}
|
|
|
|
| 29 |
#include <cstring>
|
| 30 |
#include <fstream>
|
| 31 |
#include <filesystem>
|
| 32 |
+
#include <algorithm>
|
| 33 |
|
| 34 |
namespace fs = std::filesystem;
|
| 35 |
|
| 36 |
+
static constexpr size_t MAX_CHUNK_SIZE = 1024ull * 1024ull * 1024ull; // 1 GiB
|
| 37 |
+
|
| 38 |
#ifdef _WIN32
|
| 39 |
typedef SOCKET sockfd_t;
|
| 40 |
using ssize_t = __int64;
|
|
|
|
| 326 |
static bool send_data(sockfd_t sockfd, const void * data, size_t size) {
|
| 327 |
size_t bytes_sent = 0;
|
| 328 |
while (bytes_sent < size) {
|
| 329 |
+
size_t size_to_send = std::min(size - bytes_sent, MAX_CHUNK_SIZE);
|
| 330 |
+
ssize_t n = send(sockfd, (const char *)data + bytes_sent, size_to_send, 0);
|
| 331 |
if (n < 0) {
|
| 332 |
+
GGML_LOG_ERROR("send failed (bytes_sent=%zu, size_to_send=%zu)\n",
|
| 333 |
+
bytes_sent, size_to_send);
|
| 334 |
return false;
|
| 335 |
}
|
| 336 |
+
bytes_sent += (size_t)n;
|
| 337 |
}
|
| 338 |
return true;
|
| 339 |
}
|
|
|
|
| 341 |
static bool recv_data(sockfd_t sockfd, void * data, size_t size) {
|
| 342 |
size_t bytes_recv = 0;
|
| 343 |
while (bytes_recv < size) {
|
| 344 |
+
size_t size_to_recv = std::min(size - bytes_recv, MAX_CHUNK_SIZE);
|
| 345 |
+
ssize_t n = recv(sockfd, (char *)data + bytes_recv, size_to_recv, 0);
|
| 346 |
+
if (n < 0) {
|
| 347 |
+
GGML_LOG_ERROR("recv failed (bytes_recv=%zu, size_to_recv=%zu)\n",
|
| 348 |
+
bytes_recv, size_to_recv);
|
| 349 |
+
return false;
|
| 350 |
+
}
|
| 351 |
+
if (n == 0) {
|
| 352 |
+
GGML_LOG_ERROR("recv returned 0 (peer closed?)\n");
|
| 353 |
return false;
|
| 354 |
}
|
| 355 |
+
bytes_recv += (size_t)n;
|
| 356 |
}
|
| 357 |
return true;
|
| 358 |
}
|