Spaces:
Running
Running
ruby : fix bindings (#2484)
Browse files* Improve Rakefile
* Remove intermediate files
* Remove unnecessary manipulations from extconf.rb
* Add README and LINCENSE to source files
* Manage ext source files using YAML file
* Use extsources.yaml to include files into gem package file
* Add git-managed source files to build dependency
* Add test task
* Download model for test if not exists
* Add test for build
* Ignore gem package directory
* Enable GitHub action for Ruby binding
* Fix model name
* Build lib file for test
* Use extension for each platform
* Use extension for each platform on testing
* Move built lib file rather than copy
* Add intermediate files to clean targets
- .github/workflows/bindings-ruby.yml +65 -0
- .github/workflows/bindings-ruby.yml.disabled +0 -23
- bindings/ruby/.gitignore +4 -0
- bindings/ruby/Rakefile +53 -10
- bindings/ruby/ext/.gitignore +20 -1
- bindings/ruby/ext/extconf.rb +0 -17
- bindings/ruby/ext/ggml-backend-impl.h +0 -141
- bindings/ruby/ext/ggml-backend.c +0 -2095
- bindings/ruby/ext/ggml-backend.h +0 -233
- bindings/ruby/ext/ggml-common.h +0 -0
- bindings/ruby/ext/ggml-cuda.h +0 -43
- bindings/ruby/ext/ggml-impl.h +0 -272
- bindings/ruby/ext/ggml-kompute.h +0 -46
- bindings/ruby/ext/ggml-metal.h +0 -66
- bindings/ruby/ext/ggml-opencl.h +0 -36
- bindings/ruby/ext/ggml-quants.c +0 -0
- bindings/ruby/ext/ggml-quants.h +0 -133
- bindings/ruby/ext/ggml-sycl.h +0 -49
- bindings/ruby/ext/ggml-vulkan.h +0 -29
- bindings/ruby/extsources.yaml +32 -0
- bindings/ruby/tests/test_whisper.rb +24 -4
- bindings/ruby/whispercpp.gemspec +3 -3
.github/workflows/bindings-ruby.yml
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Bindings Tests (Ruby)
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
paths:
|
| 5 |
+
- bindings/ruby/**
|
| 6 |
+
- src/whisper.cpp
|
| 7 |
+
- include/whisper.h
|
| 8 |
+
- ggml/src/ggml.c
|
| 9 |
+
- ggml/src/ggml-impl.h
|
| 10 |
+
- ggml/src/ggml-aarch64.h
|
| 11 |
+
- ggml/src/ggml-aarch64.c
|
| 12 |
+
- ggml/src/ggml-alloc.c
|
| 13 |
+
- ggml/src/ggml-backend-impl.h
|
| 14 |
+
- ggml/src/ggml-backend.cpp
|
| 15 |
+
- ggml/src/ggml-common.h
|
| 16 |
+
- ggml/src/ggml-quants.h
|
| 17 |
+
- ggml/src/ggml-quants.c
|
| 18 |
+
- ggml/src/ggml-cpu-impl.h
|
| 19 |
+
- ggml/include/ggml.h
|
| 20 |
+
- ggml/include/ggml-alloc.h
|
| 21 |
+
- ggml/include/ggml-backend.h
|
| 22 |
+
- ggml/include/ggml-cuda.h
|
| 23 |
+
- ggml/include/ggml-kompute.h
|
| 24 |
+
- ggml/include/ggml-metal.h
|
| 25 |
+
- ggml/include/ggml-sycl.h
|
| 26 |
+
- ggml/include/ggml-vulkan.h
|
| 27 |
+
- examples/dr_wav.h
|
| 28 |
+
pull_request:
|
| 29 |
+
paths:
|
| 30 |
+
- bindings/ruby/**
|
| 31 |
+
- src/whisper.cpp
|
| 32 |
+
- include/whisper.h
|
| 33 |
+
- ggml/src/ggml.c
|
| 34 |
+
- ggml/src/ggml-impl.h
|
| 35 |
+
- ggml/src/ggml-aarch64.h
|
| 36 |
+
- ggml/src/ggml-aarch64.c
|
| 37 |
+
- ggml/src/ggml-alloc.c
|
| 38 |
+
- ggml/src/ggml-backend-impl.h
|
| 39 |
+
- ggml/src/ggml-backend.cpp
|
| 40 |
+
- ggml/src/ggml-common.h
|
| 41 |
+
- ggml/src/ggml-quants.h
|
| 42 |
+
- ggml/src/ggml-quants.c
|
| 43 |
+
- ggml/src/ggml-cpu-impl.h
|
| 44 |
+
- ggml/include/ggml.h
|
| 45 |
+
- ggml/include/ggml-alloc.h
|
| 46 |
+
- ggml/include/ggml-backend.h
|
| 47 |
+
- ggml/include/ggml-cuda.h
|
| 48 |
+
- ggml/include/ggml-kompute.h
|
| 49 |
+
- ggml/include/ggml-metal.h
|
| 50 |
+
- ggml/include/ggml-sycl.h
|
| 51 |
+
- ggml/include/ggml-vulkan.h
|
| 52 |
+
- examples/dr_wav.h
|
| 53 |
+
|
| 54 |
+
jobs:
|
| 55 |
+
ubuntu-latest:
|
| 56 |
+
runs-on: ubuntu-latest
|
| 57 |
+
defaults:
|
| 58 |
+
run:
|
| 59 |
+
working-directory: bindings/ruby
|
| 60 |
+
steps:
|
| 61 |
+
- uses: ruby/setup-ruby@v1
|
| 62 |
+
with:
|
| 63 |
+
ruby-version: '3.0'
|
| 64 |
+
- uses: actions/checkout@v4
|
| 65 |
+
- run: rake test
|
.github/workflows/bindings-ruby.yml.disabled
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
# TODO: fix this workflow file, disabled for now
|
| 2 |
-
name: Bindings Tests (Ruby)
|
| 3 |
-
on:
|
| 4 |
-
push:
|
| 5 |
-
paths:
|
| 6 |
-
- bindings/ruby/**
|
| 7 |
-
- whisper.h
|
| 8 |
-
pull_request:
|
| 9 |
-
paths:
|
| 10 |
-
- bindings/ruby/**
|
| 11 |
-
- whisper.h
|
| 12 |
-
|
| 13 |
-
jobs:
|
| 14 |
-
ubuntu-latest:
|
| 15 |
-
runs-on: ubuntu-latest
|
| 16 |
-
steps:
|
| 17 |
-
- uses: ruby/setup-ruby@v1
|
| 18 |
-
with:
|
| 19 |
-
ruby-version: '3.0'
|
| 20 |
-
- uses: actions/checkout@v1
|
| 21 |
-
- run: |
|
| 22 |
-
cd bindings/ruby/ext
|
| 23 |
-
ruby extconf.rb && make
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
LICENSE
|
| 3 |
+
pkg/
|
| 4 |
+
lib/whisper.*
|
bindings/ruby/Rakefile
CHANGED
|
@@ -1,12 +1,55 @@
|
|
| 1 |
require 'rake/clean'
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
end
|
|
|
|
| 1 |
require 'rake/clean'
|
| 2 |
+
require "bundler/gem_tasks"
|
| 3 |
+
require "pathname"
|
| 4 |
+
require "yaml"
|
| 5 |
+
require "rake/testtask"
|
| 6 |
+
|
| 7 |
+
extsources = YAML.load_file("extsources.yaml")
|
| 8 |
+
extsources.each_pair do |src_dir, dests|
|
| 9 |
+
dests.each do |dest|
|
| 10 |
+
src = Pathname(src_dir)/File.basename(dest)
|
| 11 |
+
|
| 12 |
+
file src
|
| 13 |
+
file dest => src do |t|
|
| 14 |
+
cp t.source, t.name
|
| 15 |
+
end
|
| 16 |
+
end
|
| 17 |
+
end
|
| 18 |
+
SOURCES = extsources.values.flatten
|
| 19 |
+
CLEAN.include SOURCES
|
| 20 |
+
CLEAN.include FileList["ext/*.o", "ext/whisper.so", "ext/whisper.bundle", "ext/whisper.dll"]
|
| 21 |
+
|
| 22 |
+
task build: SOURCES + FileList[
|
| 23 |
+
"ext/extconf.rb",
|
| 24 |
+
"ext/ruby_whisper.h",
|
| 25 |
+
"ext/ruby_whisper.cpp",
|
| 26 |
+
"whispercpp.gemspec",
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
directory "pkg"
|
| 30 |
+
CLOBBER.include "pkg"
|
| 31 |
+
|
| 32 |
+
TEST_MODEL = "../../models/ggml-base.en.bin"
|
| 33 |
+
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
| 34 |
+
LIB_FILE = File.join("lib", LIB_NAME)
|
| 35 |
+
|
| 36 |
+
directory "lib"
|
| 37 |
+
task LIB_FILE => SOURCES + ["lib"] do |t|
|
| 38 |
+
Dir.chdir "ext" do
|
| 39 |
+
sh "ruby extconf.rb"
|
| 40 |
+
sh "make"
|
| 41 |
+
end
|
| 42 |
+
mv "ext/#{LIB_NAME}", t.name
|
| 43 |
+
end
|
| 44 |
+
CLEAN.include LIB_FILE
|
| 45 |
+
|
| 46 |
+
Rake::TestTask.new do |t|
|
| 47 |
+
t.test_files = FileList["tests/test_*.rb"]
|
| 48 |
+
end
|
| 49 |
+
task test: [TEST_MODEL, LIB_FILE]
|
| 50 |
+
|
| 51 |
+
file TEST_MODEL do
|
| 52 |
+
Dir.chdir "../.." do
|
| 53 |
+
sh "./models/download-ggml-model.sh base.en"
|
| 54 |
+
end
|
| 55 |
end
|
bindings/ruby/ext/.gitignore
CHANGED
|
@@ -3,7 +3,26 @@ ggml.c
|
|
| 3 |
ggml.h
|
| 4 |
ggml-alloc.c
|
| 5 |
ggml-alloc.h
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
whisper.cpp
|
| 8 |
whisper.h
|
| 9 |
dr_wav.h
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
ggml.h
|
| 4 |
ggml-alloc.c
|
| 5 |
ggml-alloc.h
|
| 6 |
+
ggml-aarch64.c
|
| 7 |
+
ggml-aarch64.h
|
| 8 |
+
ggml-backend.cpp
|
| 9 |
+
ggml-backend-impl.h
|
| 10 |
+
ggml-backend.c
|
| 11 |
+
ggml-backend.h
|
| 12 |
+
ggml-common.h
|
| 13 |
+
ggml-cpu-impl.h
|
| 14 |
+
ggml-cuda.h
|
| 15 |
+
ggml-impl.h
|
| 16 |
+
ggml-kompute.h
|
| 17 |
+
ggml-metal.h
|
| 18 |
+
ggml-opencl.h
|
| 19 |
+
ggml-quants.c
|
| 20 |
+
ggml-quants.h
|
| 21 |
+
ggml-sycl.h
|
| 22 |
+
ggml-vulkan.h
|
| 23 |
whisper.cpp
|
| 24 |
whisper.h
|
| 25 |
dr_wav.h
|
| 26 |
+
whisper.bundle
|
| 27 |
+
whisper.so
|
| 28 |
+
whisper.dll
|
bindings/ruby/ext/extconf.rb
CHANGED
|
@@ -1,21 +1,4 @@
|
|
| 1 |
require 'mkmf'
|
| 2 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.cpp')} .")
|
| 3 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
|
| 4 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
|
| 5 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
|
| 6 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .")
|
| 7 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-aarch64.h')} .")
|
| 8 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-aarch64.c')} .")
|
| 9 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .")
|
| 10 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .")
|
| 11 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .")
|
| 12 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.h')} .")
|
| 13 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.cpp')} .")
|
| 14 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-common.h')} .")
|
| 15 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.h')} .")
|
| 16 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.c')} .")
|
| 17 |
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")
|
| 18 |
-
|
| 19 |
|
| 20 |
# need to use c++ compiler flags
|
| 21 |
$CXXFLAGS << ' -std=c++11'
|
|
|
|
| 1 |
require 'mkmf'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
# need to use c++ compiler flags
|
| 4 |
$CXXFLAGS << ' -std=c++11'
|
bindings/ruby/ext/ggml-backend-impl.h
DELETED
|
@@ -1,141 +0,0 @@
|
|
| 1 |
-
#pragma once
|
| 2 |
-
|
| 3 |
-
// ggml-backend internal header
|
| 4 |
-
|
| 5 |
-
#include "ggml-backend.h"
|
| 6 |
-
|
| 7 |
-
#ifdef __cplusplus
|
| 8 |
-
extern "C" {
|
| 9 |
-
#endif
|
| 10 |
-
|
| 11 |
-
//
|
| 12 |
-
// Backend buffer
|
| 13 |
-
//
|
| 14 |
-
|
| 15 |
-
// buffer type
|
| 16 |
-
typedef void * ggml_backend_buffer_type_context_t;
|
| 17 |
-
|
| 18 |
-
struct ggml_backend_buffer_type_i {
|
| 19 |
-
const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft);
|
| 20 |
-
ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
|
| 21 |
-
size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
|
| 22 |
-
size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size
|
| 23 |
-
size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
|
| 24 |
-
bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
|
| 25 |
-
// check if tensor data is in host memory
|
| 26 |
-
// should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
|
| 27 |
-
bool (*GGML_CALL is_host) (ggml_backend_buffer_type_t buft);
|
| 28 |
-
};
|
| 29 |
-
|
| 30 |
-
struct ggml_backend_buffer_type {
|
| 31 |
-
struct ggml_backend_buffer_type_i iface;
|
| 32 |
-
ggml_backend_buffer_type_context_t context;
|
| 33 |
-
};
|
| 34 |
-
|
| 35 |
-
// buffer
|
| 36 |
-
typedef void * ggml_backend_buffer_context_t;
|
| 37 |
-
|
| 38 |
-
struct ggml_backend_buffer_i {
|
| 39 |
-
const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer);
|
| 40 |
-
void (*GGML_CALL free_buffer)(ggml_backend_buffer_t buffer);
|
| 41 |
-
void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer);
|
| 42 |
-
void (*GGML_CALL init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
| 43 |
-
void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
| 44 |
-
void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
| 45 |
-
bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
|
| 46 |
-
void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value);
|
| 47 |
-
void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
|
| 48 |
-
};
|
| 49 |
-
|
| 50 |
-
struct ggml_backend_buffer {
|
| 51 |
-
struct ggml_backend_buffer_i iface;
|
| 52 |
-
ggml_backend_buffer_type_t buft;
|
| 53 |
-
ggml_backend_buffer_context_t context;
|
| 54 |
-
size_t size;
|
| 55 |
-
enum ggml_backend_buffer_usage usage;
|
| 56 |
-
};
|
| 57 |
-
|
| 58 |
-
GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init(
|
| 59 |
-
ggml_backend_buffer_type_t buft,
|
| 60 |
-
struct ggml_backend_buffer_i iface,
|
| 61 |
-
ggml_backend_buffer_context_t context,
|
| 62 |
-
size_t size);
|
| 63 |
-
|
| 64 |
-
// do not use directly, use ggml_backend_tensor_copy instead
|
| 65 |
-
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
| 66 |
-
|
| 67 |
-
// buffer that contains a collection of buffers
|
| 68 |
-
GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
|
| 69 |
-
GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
|
| 70 |
-
GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
| 71 |
-
|
| 72 |
-
//
|
| 73 |
-
// Backend
|
| 74 |
-
//
|
| 75 |
-
|
| 76 |
-
typedef void * ggml_backend_context_t;
|
| 77 |
-
|
| 78 |
-
struct ggml_backend_i {
|
| 79 |
-
const char * (*GGML_CALL get_name)(ggml_backend_t backend);
|
| 80 |
-
|
| 81 |
-
void (*GGML_CALL free)(ggml_backend_t backend);
|
| 82 |
-
|
| 83 |
-
// buffer allocation
|
| 84 |
-
ggml_backend_buffer_type_t (*GGML_CALL get_default_buffer_type)(ggml_backend_t backend);
|
| 85 |
-
|
| 86 |
-
// (optional) asynchronous tensor data access
|
| 87 |
-
void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
| 88 |
-
void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
| 89 |
-
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
| 90 |
-
|
| 91 |
-
// (optional) complete all pending operations
|
| 92 |
-
void (*GGML_CALL synchronize)(ggml_backend_t backend);
|
| 93 |
-
|
| 94 |
-
// compute graph with a plan (not used currently)
|
| 95 |
-
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
|
| 96 |
-
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
| 97 |
-
|
| 98 |
-
// compute graph with a plan
|
| 99 |
-
enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
| 100 |
-
// compute graph without a plan (async)
|
| 101 |
-
enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
| 102 |
-
|
| 103 |
-
// check if the backend supports an operation
|
| 104 |
-
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
| 105 |
-
|
| 106 |
-
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
|
| 107 |
-
// these should be expensive operations with large batch sizes that may benefit from running on this backend
|
| 108 |
-
// even if the weight has to be copied from the CPU temporarily
|
| 109 |
-
bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
| 110 |
-
|
| 111 |
-
// (optional) event synchronization
|
| 112 |
-
ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
|
| 113 |
-
void (*GGML_CALL event_free) (ggml_backend_event_t event);
|
| 114 |
-
void (*GGML_CALL event_record) (ggml_backend_event_t event);
|
| 115 |
-
void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
|
| 116 |
-
void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
|
| 117 |
-
};
|
| 118 |
-
|
| 119 |
-
struct ggml_backend {
|
| 120 |
-
ggml_guid_t guid;
|
| 121 |
-
|
| 122 |
-
struct ggml_backend_i iface;
|
| 123 |
-
ggml_backend_context_t context;
|
| 124 |
-
};
|
| 125 |
-
|
| 126 |
-
struct ggml_backend_event {
|
| 127 |
-
ggml_backend_t backend;
|
| 128 |
-
void * context;
|
| 129 |
-
};
|
| 130 |
-
|
| 131 |
-
//
|
| 132 |
-
// Backend registry
|
| 133 |
-
//
|
| 134 |
-
|
| 135 |
-
typedef ggml_backend_t (*GGML_CALL ggml_backend_init_fn)(const char * params, void * user_data);
|
| 136 |
-
|
| 137 |
-
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
|
| 138 |
-
|
| 139 |
-
#ifdef __cplusplus
|
| 140 |
-
}
|
| 141 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-backend.c
DELETED
|
@@ -1,2095 +0,0 @@
|
|
| 1 |
-
#include "ggml-backend-impl.h"
|
| 2 |
-
#include "ggml-alloc.h"
|
| 3 |
-
#include "ggml-impl.h"
|
| 4 |
-
|
| 5 |
-
#include <assert.h>
|
| 6 |
-
#include <limits.h>
|
| 7 |
-
#include <stdarg.h>
|
| 8 |
-
#include <stdio.h>
|
| 9 |
-
#include <stdlib.h>
|
| 10 |
-
#include <string.h>
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
| 14 |
-
|
| 15 |
-
// backend buffer type
|
| 16 |
-
|
| 17 |
-
const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) {
|
| 18 |
-
return buft->iface.get_name(buft);
|
| 19 |
-
}
|
| 20 |
-
|
| 21 |
-
GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
| 22 |
-
return buft->iface.alloc_buffer(buft, size);
|
| 23 |
-
}
|
| 24 |
-
|
| 25 |
-
size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
|
| 26 |
-
return buft->iface.get_alignment(buft);
|
| 27 |
-
}
|
| 28 |
-
|
| 29 |
-
size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
|
| 30 |
-
// get_max_size is optional, defaults to SIZE_MAX
|
| 31 |
-
if (buft->iface.get_max_size) {
|
| 32 |
-
return buft->iface.get_max_size(buft);
|
| 33 |
-
}
|
| 34 |
-
return SIZE_MAX;
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
|
| 38 |
-
// get_alloc_size is optional, defaults to ggml_nbytes
|
| 39 |
-
if (buft->iface.get_alloc_size) {
|
| 40 |
-
size_t size = buft->iface.get_alloc_size(buft, tensor);
|
| 41 |
-
assert(size >= ggml_nbytes(tensor));
|
| 42 |
-
return size;
|
| 43 |
-
}
|
| 44 |
-
return ggml_nbytes(tensor);
|
| 45 |
-
}
|
| 46 |
-
|
| 47 |
-
bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
|
| 48 |
-
return buft->iface.supports_backend(buft, backend);
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) {
|
| 52 |
-
if (buft->iface.is_host) {
|
| 53 |
-
return buft->iface.is_host(buft);
|
| 54 |
-
}
|
| 55 |
-
return false;
|
| 56 |
-
}
|
| 57 |
-
|
| 58 |
-
// backend buffer
|
| 59 |
-
|
| 60 |
-
GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init(
|
| 61 |
-
ggml_backend_buffer_type_t buft,
|
| 62 |
-
struct ggml_backend_buffer_i iface,
|
| 63 |
-
ggml_backend_buffer_context_t context,
|
| 64 |
-
size_t size) {
|
| 65 |
-
ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer));
|
| 66 |
-
|
| 67 |
-
(*buffer) = (struct ggml_backend_buffer) {
|
| 68 |
-
/* .interface = */ iface,
|
| 69 |
-
/* .buft = */ buft,
|
| 70 |
-
/* .context = */ context,
|
| 71 |
-
/* .size = */ size,
|
| 72 |
-
/* .usage = */ GGML_BACKEND_BUFFER_USAGE_ANY
|
| 73 |
-
};
|
| 74 |
-
|
| 75 |
-
return buffer;
|
| 76 |
-
}
|
| 77 |
-
|
| 78 |
-
const char * ggml_backend_buffer_name(ggml_backend_buffer_t buffer) {
|
| 79 |
-
return buffer->iface.get_name(buffer);
|
| 80 |
-
}
|
| 81 |
-
|
| 82 |
-
void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
|
| 83 |
-
if (buffer == NULL) {
|
| 84 |
-
return;
|
| 85 |
-
}
|
| 86 |
-
|
| 87 |
-
if (buffer->iface.free_buffer != NULL) {
|
| 88 |
-
buffer->iface.free_buffer(buffer);
|
| 89 |
-
}
|
| 90 |
-
free(buffer);
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
|
| 94 |
-
return buffer->size;
|
| 95 |
-
}
|
| 96 |
-
|
| 97 |
-
void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
|
| 98 |
-
void * base = buffer->iface.get_base(buffer);
|
| 99 |
-
|
| 100 |
-
GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL");
|
| 101 |
-
|
| 102 |
-
return base;
|
| 103 |
-
}
|
| 104 |
-
|
| 105 |
-
GGML_CALL void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
| 106 |
-
// init_tensor is optional
|
| 107 |
-
if (buffer->iface.init_tensor) {
|
| 108 |
-
buffer->iface.init_tensor(buffer, tensor);
|
| 109 |
-
}
|
| 110 |
-
}
|
| 111 |
-
|
| 112 |
-
size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer) {
|
| 113 |
-
return ggml_backend_buft_get_alignment(ggml_backend_buffer_get_type(buffer));
|
| 114 |
-
}
|
| 115 |
-
|
| 116 |
-
size_t ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) {
|
| 117 |
-
return ggml_backend_buft_get_max_size(ggml_backend_buffer_get_type(buffer));
|
| 118 |
-
}
|
| 119 |
-
|
| 120 |
-
size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
| 121 |
-
return ggml_backend_buft_get_alloc_size(ggml_backend_buffer_get_type(buffer), tensor);
|
| 122 |
-
}
|
| 123 |
-
|
| 124 |
-
void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
| 125 |
-
buffer->iface.clear(buffer, value);
|
| 126 |
-
}
|
| 127 |
-
|
| 128 |
-
bool ggml_backend_buffer_is_host(ggml_backend_buffer_t buffer) {
|
| 129 |
-
return ggml_backend_buft_is_host(ggml_backend_buffer_get_type(buffer));
|
| 130 |
-
}
|
| 131 |
-
|
| 132 |
-
void ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) {
|
| 133 |
-
buffer->usage = usage;
|
| 134 |
-
|
| 135 |
-
// FIXME: add a generic callback to the buffer interface
|
| 136 |
-
if (ggml_backend_buffer_is_multi_buffer(buffer)) {
|
| 137 |
-
ggml_backend_multi_buffer_set_usage(buffer, usage);
|
| 138 |
-
}
|
| 139 |
-
}
|
| 140 |
-
|
| 141 |
-
ggml_backend_buffer_type_t ggml_backend_buffer_get_type(ggml_backend_buffer_t buffer) {
|
| 142 |
-
return buffer->buft;
|
| 143 |
-
}
|
| 144 |
-
|
| 145 |
-
void ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) {
|
| 146 |
-
if (buffer->iface.reset) {
|
| 147 |
-
buffer->iface.reset(buffer);
|
| 148 |
-
}
|
| 149 |
-
}
|
| 150 |
-
|
| 151 |
-
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst) {
|
| 152 |
-
ggml_backend_buffer_t dst_buf = dst->view_src ? dst->view_src->buffer : dst->buffer;
|
| 153 |
-
if (dst_buf->iface.cpy_tensor) {
|
| 154 |
-
return src->buffer->iface.cpy_tensor(dst_buf, src, dst);
|
| 155 |
-
}
|
| 156 |
-
return false;
|
| 157 |
-
}
|
| 158 |
-
|
| 159 |
-
// backend
|
| 160 |
-
|
| 161 |
-
ggml_guid_t ggml_backend_guid(ggml_backend_t backend) {
|
| 162 |
-
if (backend == NULL) {
|
| 163 |
-
return NULL;
|
| 164 |
-
}
|
| 165 |
-
return backend->guid;
|
| 166 |
-
}
|
| 167 |
-
|
| 168 |
-
const char * ggml_backend_name(ggml_backend_t backend) {
|
| 169 |
-
if (backend == NULL) {
|
| 170 |
-
return "NULL";
|
| 171 |
-
}
|
| 172 |
-
return backend->iface.get_name(backend);
|
| 173 |
-
}
|
| 174 |
-
|
| 175 |
-
void ggml_backend_free(ggml_backend_t backend) {
|
| 176 |
-
if (backend == NULL) {
|
| 177 |
-
return;
|
| 178 |
-
}
|
| 179 |
-
|
| 180 |
-
backend->iface.free(backend);
|
| 181 |
-
}
|
| 182 |
-
|
| 183 |
-
ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend) {
|
| 184 |
-
return backend->iface.get_default_buffer_type(backend);
|
| 185 |
-
}
|
| 186 |
-
|
| 187 |
-
ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size) {
|
| 188 |
-
return ggml_backend_buft_alloc_buffer(ggml_backend_get_default_buffer_type(backend), size);
|
| 189 |
-
}
|
| 190 |
-
|
| 191 |
-
size_t ggml_backend_get_alignment(ggml_backend_t backend) {
|
| 192 |
-
return ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend));
|
| 193 |
-
}
|
| 194 |
-
|
| 195 |
-
size_t ggml_backend_get_max_size(ggml_backend_t backend) {
|
| 196 |
-
return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend));
|
| 197 |
-
}
|
| 198 |
-
|
| 199 |
-
void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
| 200 |
-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
| 201 |
-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
| 202 |
-
|
| 203 |
-
if (backend->iface.set_tensor_async == NULL) {
|
| 204 |
-
ggml_backend_tensor_set(tensor, data, offset, size);
|
| 205 |
-
} else {
|
| 206 |
-
backend->iface.set_tensor_async(backend, tensor, data, offset, size);
|
| 207 |
-
}
|
| 208 |
-
}
|
| 209 |
-
|
| 210 |
-
void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
| 211 |
-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
| 212 |
-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
| 213 |
-
|
| 214 |
-
if (backend->iface.get_tensor_async == NULL) {
|
| 215 |
-
ggml_backend_tensor_get(tensor, data, offset, size);
|
| 216 |
-
} else {
|
| 217 |
-
backend->iface.get_tensor_async(backend, tensor, data, offset, size);
|
| 218 |
-
}
|
| 219 |
-
}
|
| 220 |
-
|
| 221 |
-
GGML_CALL void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
| 222 |
-
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
| 223 |
-
|
| 224 |
-
GGML_ASSERT(buf != NULL && "tensor buffer not set");
|
| 225 |
-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
| 226 |
-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
| 227 |
-
|
| 228 |
-
if (!size) {
|
| 229 |
-
return;
|
| 230 |
-
}
|
| 231 |
-
|
| 232 |
-
buf->iface.set_tensor(buf, tensor, data, offset, size);
|
| 233 |
-
}
|
| 234 |
-
|
| 235 |
-
GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
| 236 |
-
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
| 237 |
-
|
| 238 |
-
GGML_ASSERT(buf != NULL && "tensor buffer not set");
|
| 239 |
-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
| 240 |
-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
| 241 |
-
|
| 242 |
-
if (!size) {
|
| 243 |
-
return;
|
| 244 |
-
}
|
| 245 |
-
|
| 246 |
-
buf->iface.get_tensor(buf, tensor, data, offset, size);
|
| 247 |
-
}
|
| 248 |
-
|
| 249 |
-
void ggml_backend_synchronize(ggml_backend_t backend) {
|
| 250 |
-
if (backend->iface.synchronize == NULL) {
|
| 251 |
-
return;
|
| 252 |
-
}
|
| 253 |
-
|
| 254 |
-
backend->iface.synchronize(backend);
|
| 255 |
-
}
|
| 256 |
-
|
| 257 |
-
ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
| 258 |
-
GGML_ASSERT(backend->iface.graph_plan_create != NULL);
|
| 259 |
-
|
| 260 |
-
return backend->iface.graph_plan_create(backend, cgraph);
|
| 261 |
-
}
|
| 262 |
-
|
| 263 |
-
void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
| 264 |
-
GGML_ASSERT(backend->iface.graph_plan_free != NULL);
|
| 265 |
-
|
| 266 |
-
backend->iface.graph_plan_free(backend, plan);
|
| 267 |
-
}
|
| 268 |
-
|
| 269 |
-
enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
| 270 |
-
GGML_ASSERT(backend->iface.graph_plan_compute != NULL);
|
| 271 |
-
|
| 272 |
-
return backend->iface.graph_plan_compute(backend, plan);
|
| 273 |
-
}
|
| 274 |
-
|
| 275 |
-
enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
| 276 |
-
enum ggml_status err = ggml_backend_graph_compute_async(backend, cgraph);
|
| 277 |
-
ggml_backend_synchronize(backend);
|
| 278 |
-
return err;
|
| 279 |
-
}
|
| 280 |
-
|
| 281 |
-
enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
| 282 |
-
return backend->iface.graph_compute(backend, cgraph);
|
| 283 |
-
}
|
| 284 |
-
|
| 285 |
-
bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
| 286 |
-
return backend->iface.supports_op(backend, op);
|
| 287 |
-
}
|
| 288 |
-
|
| 289 |
-
bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
| 290 |
-
if (backend->iface.offload_op != NULL) {
|
| 291 |
-
return backend->iface.offload_op(backend, op);
|
| 292 |
-
}
|
| 293 |
-
return false;
|
| 294 |
-
}
|
| 295 |
-
|
| 296 |
-
// backend copy
|
| 297 |
-
|
| 298 |
-
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
|
| 299 |
-
if (a->type != b->type) {
|
| 300 |
-
return false;
|
| 301 |
-
}
|
| 302 |
-
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
| 303 |
-
if (a->ne[i] != b->ne[i]) {
|
| 304 |
-
return false;
|
| 305 |
-
}
|
| 306 |
-
if (a->nb[i] != b->nb[i]) {
|
| 307 |
-
return false;
|
| 308 |
-
}
|
| 309 |
-
}
|
| 310 |
-
return true;
|
| 311 |
-
}
|
| 312 |
-
|
| 313 |
-
void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) {
|
| 314 |
-
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
|
| 315 |
-
|
| 316 |
-
if (src == dst) {
|
| 317 |
-
return;
|
| 318 |
-
}
|
| 319 |
-
|
| 320 |
-
if (ggml_backend_buffer_is_host(src->buffer)) {
|
| 321 |
-
ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(src));
|
| 322 |
-
} else if (ggml_backend_buffer_is_host(dst->buffer)) {
|
| 323 |
-
ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src));
|
| 324 |
-
} else if (!ggml_backend_buffer_copy_tensor(src, dst)) {
|
| 325 |
-
#ifndef NDEBUG
|
| 326 |
-
fprintf(stderr, "%s: warning: slow copy from %s to %s\n", __func__, ggml_backend_buffer_name(src->buffer), ggml_backend_buffer_name(dst->buffer));
|
| 327 |
-
#endif
|
| 328 |
-
size_t nbytes = ggml_nbytes(src);
|
| 329 |
-
void * data = malloc(nbytes);
|
| 330 |
-
ggml_backend_tensor_get(src, data, 0, nbytes);
|
| 331 |
-
ggml_backend_tensor_set(dst, data, 0, nbytes);
|
| 332 |
-
free(data);
|
| 333 |
-
}
|
| 334 |
-
}
|
| 335 |
-
|
| 336 |
-
void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst) {
|
| 337 |
-
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
|
| 338 |
-
|
| 339 |
-
if (src == dst) {
|
| 340 |
-
return;
|
| 341 |
-
}
|
| 342 |
-
|
| 343 |
-
if (backend_dst->iface.cpy_tensor_async != NULL) {
|
| 344 |
-
if (backend_dst->iface.cpy_tensor_async(backend_src, backend_dst, src, dst)) {
|
| 345 |
-
return;
|
| 346 |
-
}
|
| 347 |
-
}
|
| 348 |
-
|
| 349 |
-
// an async copy would normally happen after all the queued operations on both backends are completed
|
| 350 |
-
// sync src, set_async dst
|
| 351 |
-
if (ggml_backend_buffer_is_host(src->buffer)) {
|
| 352 |
-
ggml_backend_synchronize(backend_src);
|
| 353 |
-
ggml_backend_tensor_set_async(backend_dst, dst, src->data, 0, ggml_nbytes(src));
|
| 354 |
-
} else {
|
| 355 |
-
ggml_backend_synchronize(backend_src);
|
| 356 |
-
ggml_backend_tensor_copy(src, dst);
|
| 357 |
-
ggml_backend_synchronize(backend_dst);
|
| 358 |
-
}
|
| 359 |
-
}
|
| 360 |
-
|
| 361 |
-
// events
|
| 362 |
-
|
| 363 |
-
ggml_backend_event_t ggml_backend_event_new(ggml_backend_t backend) {
|
| 364 |
-
if (backend->iface.event_new == NULL) {
|
| 365 |
-
return NULL;
|
| 366 |
-
}
|
| 367 |
-
return backend->iface.event_new(backend);
|
| 368 |
-
}
|
| 369 |
-
|
| 370 |
-
void ggml_backend_event_free(ggml_backend_event_t event) {
|
| 371 |
-
if (event == NULL) {
|
| 372 |
-
return;
|
| 373 |
-
}
|
| 374 |
-
event->backend->iface.event_free(event);
|
| 375 |
-
}
|
| 376 |
-
|
| 377 |
-
void ggml_backend_event_record(ggml_backend_event_t event) {
|
| 378 |
-
GGML_ASSERT(event->backend->iface.event_record != NULL);
|
| 379 |
-
|
| 380 |
-
event->backend->iface.event_record(event);
|
| 381 |
-
}
|
| 382 |
-
|
| 383 |
-
void ggml_backend_event_synchronize(ggml_backend_event_t event) {
|
| 384 |
-
GGML_ASSERT(event->backend->iface.event_synchronize != NULL);
|
| 385 |
-
|
| 386 |
-
event->backend->iface.event_synchronize(event);
|
| 387 |
-
}
|
| 388 |
-
|
| 389 |
-
void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
|
| 390 |
-
GGML_ASSERT(backend->iface.event_wait != NULL);
|
| 391 |
-
|
| 392 |
-
backend->iface.event_wait(backend, event);
|
| 393 |
-
}
|
| 394 |
-
|
| 395 |
-
// backend registry
|
| 396 |
-
|
| 397 |
-
#define GGML_REG_MAX_BACKENDS 16
|
| 398 |
-
|
| 399 |
-
struct ggml_backend_reg {
|
| 400 |
-
char name[128];
|
| 401 |
-
ggml_backend_init_fn init_fn;
|
| 402 |
-
ggml_backend_buffer_type_t default_buffer_type;
|
| 403 |
-
void * user_data;
|
| 404 |
-
};
|
| 405 |
-
|
| 406 |
-
static struct ggml_backend_reg ggml_backend_registry[GGML_REG_MAX_BACKENDS];
|
| 407 |
-
static size_t ggml_backend_registry_count = 0;
|
| 408 |
-
|
| 409 |
-
GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, void * user_data);
|
| 410 |
-
|
| 411 |
-
GGML_CALL static void ggml_backend_registry_init(void) {
|
| 412 |
-
static bool initialized = false;
|
| 413 |
-
|
| 414 |
-
if (initialized) {
|
| 415 |
-
return;
|
| 416 |
-
}
|
| 417 |
-
|
| 418 |
-
initialized = true;
|
| 419 |
-
|
| 420 |
-
ggml_backend_register("CPU", ggml_backend_reg_cpu_init, ggml_backend_cpu_buffer_type(), NULL);
|
| 421 |
-
|
| 422 |
-
// add forward decls here to avoid including the backend headers
|
| 423 |
-
#ifdef GGML_USE_CUDA
|
| 424 |
-
extern GGML_CALL void ggml_backend_cuda_reg_devices(void);
|
| 425 |
-
ggml_backend_cuda_reg_devices();
|
| 426 |
-
#endif
|
| 427 |
-
|
| 428 |
-
#ifdef GGML_USE_SYCL
|
| 429 |
-
extern void ggml_backend_sycl_reg_devices(void);
|
| 430 |
-
ggml_backend_sycl_reg_devices();
|
| 431 |
-
#endif
|
| 432 |
-
|
| 433 |
-
#ifdef GGML_USE_METAL
|
| 434 |
-
extern GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data);
|
| 435 |
-
extern GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
| 436 |
-
ggml_backend_register("Metal", ggml_backend_reg_metal_init, ggml_backend_metal_buffer_type(), NULL);
|
| 437 |
-
#endif
|
| 438 |
-
|
| 439 |
-
#ifdef GGML_USE_VULKAN
|
| 440 |
-
extern GGML_CALL int ggml_backend_vk_reg_devices(void);
|
| 441 |
-
ggml_backend_vk_reg_devices();
|
| 442 |
-
#endif
|
| 443 |
-
|
| 444 |
-
#ifdef GGML_USE_KOMPUTE
|
| 445 |
-
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
|
| 446 |
-
ggml_backend_kompute_reg_devices();
|
| 447 |
-
#endif
|
| 448 |
-
}
|
| 449 |
-
|
| 450 |
-
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
|
| 451 |
-
GGML_ASSERT(ggml_backend_registry_count < GGML_REG_MAX_BACKENDS);
|
| 452 |
-
|
| 453 |
-
size_t id = ggml_backend_registry_count;
|
| 454 |
-
|
| 455 |
-
ggml_backend_registry[id] = (struct ggml_backend_reg) {
|
| 456 |
-
/* .name = */ {0},
|
| 457 |
-
/* .fn = */ init_fn,
|
| 458 |
-
/* .default_buffer_type = */ default_buffer_type,
|
| 459 |
-
/* .user_data = */ user_data,
|
| 460 |
-
};
|
| 461 |
-
|
| 462 |
-
snprintf(ggml_backend_registry[id].name, sizeof(ggml_backend_registry[id].name), "%s", name);
|
| 463 |
-
|
| 464 |
-
#ifndef NDEBUG
|
| 465 |
-
fprintf(stderr, "%s: registered backend %s\n", __func__, name);
|
| 466 |
-
#endif
|
| 467 |
-
|
| 468 |
-
ggml_backend_registry_count++;
|
| 469 |
-
}
|
| 470 |
-
|
| 471 |
-
size_t ggml_backend_reg_get_count(void) {
|
| 472 |
-
ggml_backend_registry_init();
|
| 473 |
-
|
| 474 |
-
return ggml_backend_registry_count;
|
| 475 |
-
}
|
| 476 |
-
|
| 477 |
-
size_t ggml_backend_reg_find_by_name(const char * name) {
|
| 478 |
-
ggml_backend_registry_init();
|
| 479 |
-
|
| 480 |
-
for (size_t i = 0; i < ggml_backend_registry_count; i++) {
|
| 481 |
-
// TODO: case insensitive in a portable way
|
| 482 |
-
if (strcmp(ggml_backend_registry[i].name, name) == 0) {
|
| 483 |
-
return i;
|
| 484 |
-
}
|
| 485 |
-
}
|
| 486 |
-
|
| 487 |
-
// not found
|
| 488 |
-
return SIZE_MAX;
|
| 489 |
-
}
|
| 490 |
-
|
| 491 |
-
// init from backend:params string
|
| 492 |
-
ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str) {
|
| 493 |
-
ggml_backend_registry_init();
|
| 494 |
-
|
| 495 |
-
const char * params = strchr(backend_str, ':');
|
| 496 |
-
char backend_name[128];
|
| 497 |
-
if (params == NULL) {
|
| 498 |
-
snprintf(backend_name, sizeof(backend_name), "%s", backend_str);
|
| 499 |
-
params = "";
|
| 500 |
-
} else {
|
| 501 |
-
snprintf(backend_name, sizeof(backend_name), "%.*s", (int)(params - backend_str), backend_str);
|
| 502 |
-
params++;
|
| 503 |
-
}
|
| 504 |
-
|
| 505 |
-
size_t backend_i = ggml_backend_reg_find_by_name(backend_name);
|
| 506 |
-
|
| 507 |
-
if (backend_i == SIZE_MAX) {
|
| 508 |
-
fprintf(stderr, "%s: backend %s not found\n", __func__, backend_name);
|
| 509 |
-
return NULL;
|
| 510 |
-
}
|
| 511 |
-
|
| 512 |
-
return ggml_backend_reg_init_backend(backend_i, params);
|
| 513 |
-
}
|
| 514 |
-
|
| 515 |
-
const char * ggml_backend_reg_get_name(size_t i) {
|
| 516 |
-
ggml_backend_registry_init();
|
| 517 |
-
|
| 518 |
-
GGML_ASSERT(i < ggml_backend_registry_count);
|
| 519 |
-
return ggml_backend_registry[i].name;
|
| 520 |
-
}
|
| 521 |
-
|
| 522 |
-
ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params) {
|
| 523 |
-
ggml_backend_registry_init();
|
| 524 |
-
|
| 525 |
-
GGML_ASSERT(i < ggml_backend_registry_count);
|
| 526 |
-
return ggml_backend_registry[i].init_fn(params, ggml_backend_registry[i].user_data);
|
| 527 |
-
}
|
| 528 |
-
|
| 529 |
-
ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i) {
|
| 530 |
-
ggml_backend_registry_init();
|
| 531 |
-
|
| 532 |
-
GGML_ASSERT(i < ggml_backend_registry_count);
|
| 533 |
-
return ggml_backend_registry[i].default_buffer_type;
|
| 534 |
-
}
|
| 535 |
-
|
| 536 |
-
ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size) {
|
| 537 |
-
ggml_backend_registry_init();
|
| 538 |
-
|
| 539 |
-
GGML_ASSERT(i < ggml_backend_registry_count);
|
| 540 |
-
return ggml_backend_buft_alloc_buffer(ggml_backend_registry[i].default_buffer_type, size);
|
| 541 |
-
}
|
| 542 |
-
|
| 543 |
-
// backend CPU
|
| 544 |
-
|
| 545 |
-
static const size_t TENSOR_ALIGNMENT = 32; // required for mmap as gguf only guarantees 32-byte alignment
|
| 546 |
-
|
| 547 |
-
GGML_CALL static const char * ggml_backend_cpu_buffer_name(ggml_backend_buffer_t buffer) {
|
| 548 |
-
return "CPU";
|
| 549 |
-
|
| 550 |
-
GGML_UNUSED(buffer);
|
| 551 |
-
}
|
| 552 |
-
|
| 553 |
-
GGML_CALL static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
| 554 |
-
uintptr_t data = (uintptr_t)buffer->context;
|
| 555 |
-
|
| 556 |
-
// align the buffer
|
| 557 |
-
if (data % TENSOR_ALIGNMENT != 0) {
|
| 558 |
-
data = GGML_PAD(data, TENSOR_ALIGNMENT);
|
| 559 |
-
}
|
| 560 |
-
|
| 561 |
-
return (void *)data;
|
| 562 |
-
}
|
| 563 |
-
|
| 564 |
-
GGML_CALL static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
| 565 |
-
free(buffer->context);
|
| 566 |
-
}
|
| 567 |
-
|
| 568 |
-
GGML_CALL static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
| 569 |
-
memcpy((char *)tensor->data + offset, data, size);
|
| 570 |
-
|
| 571 |
-
GGML_UNUSED(buffer);
|
| 572 |
-
}
|
| 573 |
-
|
| 574 |
-
GGML_CALL static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
| 575 |
-
memcpy(data, (const char *)tensor->data + offset, size);
|
| 576 |
-
|
| 577 |
-
GGML_UNUSED(buffer);
|
| 578 |
-
}
|
| 579 |
-
|
| 580 |
-
GGML_CALL static bool ggml_backend_cpu_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
|
| 581 |
-
if (ggml_backend_buffer_is_host(src->buffer)) {
|
| 582 |
-
memcpy(dst->data, src->data, ggml_nbytes(src));
|
| 583 |
-
return true;
|
| 584 |
-
}
|
| 585 |
-
return false;
|
| 586 |
-
|
| 587 |
-
GGML_UNUSED(buffer);
|
| 588 |
-
}
|
| 589 |
-
|
| 590 |
-
GGML_CALL static void ggml_backend_cpu_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
| 591 |
-
memset(buffer->context, value, buffer->size);
|
| 592 |
-
}
|
| 593 |
-
|
| 594 |
-
static struct ggml_backend_buffer_i cpu_backend_buffer_i = {
|
| 595 |
-
/* .get_name = */ ggml_backend_cpu_buffer_name,
|
| 596 |
-
/* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer,
|
| 597 |
-
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
|
| 598 |
-
/* .init_tensor = */ NULL, // no initialization required
|
| 599 |
-
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
|
| 600 |
-
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
|
| 601 |
-
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
|
| 602 |
-
/* .clear = */ ggml_backend_cpu_buffer_clear,
|
| 603 |
-
/* .reset = */ NULL,
|
| 604 |
-
};
|
| 605 |
-
|
| 606 |
-
// for buffers from ptr, free is not called
|
| 607 |
-
static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {
|
| 608 |
-
/* .get_name = */ ggml_backend_cpu_buffer_name,
|
| 609 |
-
/* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
|
| 610 |
-
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
|
| 611 |
-
/* .init_tensor = */ NULL, // no initialization required
|
| 612 |
-
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
|
| 613 |
-
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
|
| 614 |
-
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
|
| 615 |
-
/* .clear = */ ggml_backend_cpu_buffer_clear,
|
| 616 |
-
/* .reset = */ NULL,
|
| 617 |
-
};
|
| 618 |
-
|
| 619 |
-
GGML_CALL static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
| 620 |
-
return "CPU";
|
| 621 |
-
|
| 622 |
-
GGML_UNUSED(buft);
|
| 623 |
-
}
|
| 624 |
-
|
| 625 |
-
GGML_CALL static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
| 626 |
-
size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned
|
| 627 |
-
void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h)
|
| 628 |
-
if (data == NULL) {
|
| 629 |
-
fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
|
| 630 |
-
return NULL;
|
| 631 |
-
}
|
| 632 |
-
|
| 633 |
-
return ggml_backend_buffer_init(buft, cpu_backend_buffer_i, data, size);
|
| 634 |
-
}
|
| 635 |
-
|
| 636 |
-
GGML_CALL static size_t ggml_backend_cpu_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
| 637 |
-
return TENSOR_ALIGNMENT;
|
| 638 |
-
|
| 639 |
-
GGML_UNUSED(buft);
|
| 640 |
-
}
|
| 641 |
-
|
| 642 |
-
GGML_CALL static bool ggml_backend_cpu_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
|
| 643 |
-
return ggml_backend_is_cpu(backend);
|
| 644 |
-
|
| 645 |
-
GGML_UNUSED(buft);
|
| 646 |
-
}
|
| 647 |
-
|
| 648 |
-
GGML_CALL static bool ggml_backend_cpu_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
|
| 649 |
-
return true;
|
| 650 |
-
|
| 651 |
-
GGML_UNUSED(buft);
|
| 652 |
-
}
|
| 653 |
-
|
| 654 |
-
GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) {
|
| 655 |
-
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
|
| 656 |
-
/* .iface = */ {
|
| 657 |
-
/* .get_name = */ ggml_backend_cpu_buffer_type_get_name,
|
| 658 |
-
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
|
| 659 |
-
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
| 660 |
-
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
| 661 |
-
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
| 662 |
-
/* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend,
|
| 663 |
-
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
| 664 |
-
},
|
| 665 |
-
/* .context = */ NULL,
|
| 666 |
-
};
|
| 667 |
-
|
| 668 |
-
return &ggml_backend_cpu_buffer_type;
|
| 669 |
-
}
|
| 670 |
-
|
| 671 |
-
#ifdef GGML_USE_CPU_HBM
|
| 672 |
-
|
| 673 |
-
// buffer type HBM
|
| 674 |
-
|
| 675 |
-
#include <hbwmalloc.h>
|
| 676 |
-
|
| 677 |
-
GGML_CALL static const char * ggml_backend_cpu_hbm_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
| 678 |
-
return "CPU_HBM";
|
| 679 |
-
|
| 680 |
-
GGML_UNUSED(buft);
|
| 681 |
-
}
|
| 682 |
-
|
| 683 |
-
GGML_CALL static const char * ggml_backend_cpu_hbm_buffer_get_name(ggml_backend_buffer_t buf) {
|
| 684 |
-
return "CPU_HBM";
|
| 685 |
-
|
| 686 |
-
GGML_UNUSED(buf);
|
| 687 |
-
}
|
| 688 |
-
|
| 689 |
-
GGML_CALL static void ggml_backend_cpu_hbm_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
| 690 |
-
hbw_free(buffer->context);
|
| 691 |
-
}
|
| 692 |
-
|
| 693 |
-
GGML_CALL static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
| 694 |
-
//void * ptr = hbw_malloc(size);
|
| 695 |
-
void * ptr;
|
| 696 |
-
int result = hbw_posix_memalign(&ptr, ggml_backend_cpu_buffer_type_get_alignment(buft), size);
|
| 697 |
-
if (result != 0) {
|
| 698 |
-
fprintf(stderr, "failed to allocate HBM buffer of size %zu\n", size);
|
| 699 |
-
return NULL;
|
| 700 |
-
}
|
| 701 |
-
|
| 702 |
-
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
| 703 |
-
buffer->buft = buft;
|
| 704 |
-
buffer->iface.get_name = ggml_backend_cpu_hbm_buffer_get_name;
|
| 705 |
-
buffer->iface.free_buffer = ggml_backend_cpu_hbm_buffer_free_buffer;
|
| 706 |
-
|
| 707 |
-
return buffer;
|
| 708 |
-
}
|
| 709 |
-
|
| 710 |
-
ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
|
| 711 |
-
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
|
| 712 |
-
/* .iface = */ {
|
| 713 |
-
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
|
| 714 |
-
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
|
| 715 |
-
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
| 716 |
-
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
| 717 |
-
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
| 718 |
-
/* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend,
|
| 719 |
-
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
| 720 |
-
},
|
| 721 |
-
/* .context = */ NULL,
|
| 722 |
-
};
|
| 723 |
-
|
| 724 |
-
return &ggml_backend_cpu_buffer_type_hbm;
|
| 725 |
-
}
|
| 726 |
-
#endif
|
| 727 |
-
|
| 728 |
-
struct ggml_backend_cpu_context {
|
| 729 |
-
int n_threads;
|
| 730 |
-
void * work_data;
|
| 731 |
-
size_t work_size;
|
| 732 |
-
|
| 733 |
-
ggml_abort_callback abort_callback;
|
| 734 |
-
void * abort_callback_data;
|
| 735 |
-
};
|
| 736 |
-
|
| 737 |
-
GGML_CALL static const char * ggml_backend_cpu_name(ggml_backend_t backend) {
|
| 738 |
-
return "CPU";
|
| 739 |
-
|
| 740 |
-
GGML_UNUSED(backend);
|
| 741 |
-
}
|
| 742 |
-
|
| 743 |
-
GGML_CALL static void ggml_backend_cpu_free(ggml_backend_t backend) {
|
| 744 |
-
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
| 745 |
-
free(cpu_ctx->work_data);
|
| 746 |
-
free(cpu_ctx);
|
| 747 |
-
free(backend);
|
| 748 |
-
}
|
| 749 |
-
|
| 750 |
-
GGML_CALL static ggml_backend_buffer_type_t ggml_backend_cpu_get_default_buffer_type(ggml_backend_t backend) {
|
| 751 |
-
return ggml_backend_cpu_buffer_type();
|
| 752 |
-
|
| 753 |
-
GGML_UNUSED(backend);
|
| 754 |
-
}
|
| 755 |
-
|
| 756 |
-
struct ggml_backend_plan_cpu {
|
| 757 |
-
struct ggml_cplan cplan;
|
| 758 |
-
struct ggml_cgraph cgraph;
|
| 759 |
-
};
|
| 760 |
-
|
| 761 |
-
GGML_CALL static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, const struct ggml_cgraph * cgraph) {
|
| 762 |
-
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
| 763 |
-
|
| 764 |
-
struct ggml_backend_plan_cpu * cpu_plan = malloc(sizeof(struct ggml_backend_plan_cpu));
|
| 765 |
-
|
| 766 |
-
cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
|
| 767 |
-
cpu_plan->cgraph = *cgraph; // FIXME: deep copy
|
| 768 |
-
|
| 769 |
-
if (cpu_plan->cplan.work_size > 0) {
|
| 770 |
-
cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size);
|
| 771 |
-
if (cpu_plan->cplan.work_data == NULL) {
|
| 772 |
-
free(cpu_plan);
|
| 773 |
-
return NULL;
|
| 774 |
-
}
|
| 775 |
-
}
|
| 776 |
-
|
| 777 |
-
cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
|
| 778 |
-
cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
|
| 779 |
-
|
| 780 |
-
return cpu_plan;
|
| 781 |
-
}
|
| 782 |
-
|
| 783 |
-
GGML_CALL static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
| 784 |
-
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
|
| 785 |
-
|
| 786 |
-
free(cpu_plan->cplan.work_data);
|
| 787 |
-
free(cpu_plan);
|
| 788 |
-
|
| 789 |
-
GGML_UNUSED(backend);
|
| 790 |
-
}
|
| 791 |
-
|
| 792 |
-
GGML_CALL static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
| 793 |
-
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
|
| 794 |
-
|
| 795 |
-
return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
|
| 796 |
-
|
| 797 |
-
GGML_UNUSED(backend);
|
| 798 |
-
}
|
| 799 |
-
|
| 800 |
-
GGML_CALL static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
| 801 |
-
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
| 802 |
-
|
| 803 |
-
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
|
| 804 |
-
|
| 805 |
-
if (cpu_ctx->work_size < cplan.work_size) {
|
| 806 |
-
free(cpu_ctx->work_data);
|
| 807 |
-
cpu_ctx->work_data = malloc(cplan.work_size);
|
| 808 |
-
if (cpu_ctx->work_data == NULL) {
|
| 809 |
-
cpu_ctx->work_size = 0;
|
| 810 |
-
return GGML_STATUS_ALLOC_FAILED;
|
| 811 |
-
}
|
| 812 |
-
cpu_ctx->work_size = cplan.work_size;
|
| 813 |
-
}
|
| 814 |
-
cplan.work_data = cpu_ctx->work_data;
|
| 815 |
-
|
| 816 |
-
cplan.abort_callback = cpu_ctx->abort_callback;
|
| 817 |
-
cplan.abort_callback_data = cpu_ctx->abort_callback_data;
|
| 818 |
-
|
| 819 |
-
return ggml_graph_compute(cgraph, &cplan);
|
| 820 |
-
}
|
| 821 |
-
|
| 822 |
-
GGML_CALL static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
| 823 |
-
switch (op->op) {
|
| 824 |
-
case GGML_OP_CPY:
|
| 825 |
-
return op->type != GGML_TYPE_IQ2_XXS && op->type != GGML_TYPE_IQ2_XS && op->type != GGML_TYPE_IQ1_S; // missing type_traits.from_float
|
| 826 |
-
case GGML_OP_MUL_MAT:
|
| 827 |
-
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type).vec_dot_type;
|
| 828 |
-
default:
|
| 829 |
-
return true;
|
| 830 |
-
}
|
| 831 |
-
|
| 832 |
-
GGML_UNUSED(backend);
|
| 833 |
-
}
|
| 834 |
-
|
| 835 |
-
static struct ggml_backend_i cpu_backend_i = {
|
| 836 |
-
/* .get_name = */ ggml_backend_cpu_name,
|
| 837 |
-
/* .free = */ ggml_backend_cpu_free,
|
| 838 |
-
/* .get_default_buffer_type = */ ggml_backend_cpu_get_default_buffer_type,
|
| 839 |
-
/* .set_tensor_async = */ NULL,
|
| 840 |
-
/* .get_tensor_async = */ NULL,
|
| 841 |
-
/* .cpy_tensor_async = */ NULL,
|
| 842 |
-
/* .synchronize = */ NULL,
|
| 843 |
-
/* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create,
|
| 844 |
-
/* .graph_plan_free = */ ggml_backend_cpu_graph_plan_free,
|
| 845 |
-
/* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
|
| 846 |
-
/* .graph_compute = */ ggml_backend_cpu_graph_compute,
|
| 847 |
-
/* .supports_op = */ ggml_backend_cpu_supports_op,
|
| 848 |
-
/* .offload_op = */ NULL,
|
| 849 |
-
/* .event_new = */ NULL,
|
| 850 |
-
/* .event_free = */ NULL,
|
| 851 |
-
/* .event_record = */ NULL,
|
| 852 |
-
/* .event_wait = */ NULL,
|
| 853 |
-
/* .event_synchronize = */ NULL,
|
| 854 |
-
};
|
| 855 |
-
|
| 856 |
-
static ggml_guid_t ggml_backend_cpu_guid(void) {
|
| 857 |
-
static ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
|
| 858 |
-
return &guid;
|
| 859 |
-
}
|
| 860 |
-
|
| 861 |
-
ggml_backend_t ggml_backend_cpu_init(void) {
|
| 862 |
-
struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context));
|
| 863 |
-
if (ctx == NULL) {
|
| 864 |
-
return NULL;
|
| 865 |
-
}
|
| 866 |
-
|
| 867 |
-
ctx->n_threads = GGML_DEFAULT_N_THREADS;
|
| 868 |
-
ctx->work_data = NULL;
|
| 869 |
-
ctx->work_size = 0;
|
| 870 |
-
ctx->abort_callback = NULL;
|
| 871 |
-
ctx->abort_callback_data = NULL;
|
| 872 |
-
|
| 873 |
-
ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend));
|
| 874 |
-
if (cpu_backend == NULL) {
|
| 875 |
-
free(ctx);
|
| 876 |
-
return NULL;
|
| 877 |
-
}
|
| 878 |
-
|
| 879 |
-
*cpu_backend = (struct ggml_backend) {
|
| 880 |
-
/* .guid = */ ggml_backend_cpu_guid(),
|
| 881 |
-
/* .interface = */ cpu_backend_i,
|
| 882 |
-
/* .context = */ ctx
|
| 883 |
-
};
|
| 884 |
-
return cpu_backend;
|
| 885 |
-
}
|
| 886 |
-
|
| 887 |
-
GGML_CALL bool ggml_backend_is_cpu(ggml_backend_t backend) {
|
| 888 |
-
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_cpu_guid());
|
| 889 |
-
}
|
| 890 |
-
|
| 891 |
-
void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
|
| 892 |
-
GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
|
| 893 |
-
|
| 894 |
-
struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
|
| 895 |
-
ctx->n_threads = n_threads;
|
| 896 |
-
}
|
| 897 |
-
|
| 898 |
-
void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
|
| 899 |
-
GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
|
| 900 |
-
|
| 901 |
-
struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
|
| 902 |
-
ctx->abort_callback = abort_callback;
|
| 903 |
-
ctx->abort_callback_data = abort_callback_data;
|
| 904 |
-
}
|
| 905 |
-
|
| 906 |
-
GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size) {
|
| 907 |
-
GGML_ASSERT((uintptr_t)ptr % TENSOR_ALIGNMENT == 0 && "buffer pointer must be aligned");
|
| 908 |
-
return ggml_backend_buffer_init(ggml_backend_cpu_buffer_type(), cpu_backend_buffer_i_from_ptr, ptr, size);
|
| 909 |
-
}
|
| 910 |
-
|
| 911 |
-
GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, void * user_data) {
|
| 912 |
-
return ggml_backend_cpu_init();
|
| 913 |
-
|
| 914 |
-
GGML_UNUSED(params);
|
| 915 |
-
GGML_UNUSED(user_data);
|
| 916 |
-
}
|
| 917 |
-
|
| 918 |
-
// multi-buffer buffer
|
| 919 |
-
|
| 920 |
-
struct ggml_backend_multi_buffer_context {
|
| 921 |
-
ggml_backend_buffer_t * buffers;
|
| 922 |
-
size_t n_buffers;
|
| 923 |
-
};
|
| 924 |
-
|
| 925 |
-
typedef struct ggml_backend_multi_buffer_context * ggml_backend_multi_buffer_context_t;
|
| 926 |
-
|
| 927 |
-
GGML_CALL static const char * ggml_backend_multi_buffer_get_name(ggml_backend_buffer_t buffer) {
|
| 928 |
-
ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
|
| 929 |
-
|
| 930 |
-
return ctx->buffers[0]->iface.get_name(ctx->buffers[0]);
|
| 931 |
-
}
|
| 932 |
-
|
| 933 |
-
GGML_CALL static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
| 934 |
-
ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
|
| 935 |
-
for (size_t i = 0; i < ctx->n_buffers; i++) {
|
| 936 |
-
ggml_backend_buffer_free(ctx->buffers[i]);
|
| 937 |
-
}
|
| 938 |
-
|
| 939 |
-
free(ctx->buffers);
|
| 940 |
-
free(ctx);
|
| 941 |
-
}
|
| 942 |
-
|
| 943 |
-
GGML_CALL static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
| 944 |
-
ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
|
| 945 |
-
for (size_t i = 0; i < ctx->n_buffers; i++) {
|
| 946 |
-
ggml_backend_buffer_clear(ctx->buffers[i], value);
|
| 947 |
-
}
|
| 948 |
-
}
|
| 949 |
-
|
| 950 |
-
static struct ggml_backend_buffer_i ggml_backend_multi_buffer_context_interface(void) {
|
| 951 |
-
static struct ggml_backend_buffer_i multi_backend_buffer_i = {
|
| 952 |
-
/* .get_name = */ ggml_backend_multi_buffer_get_name,
|
| 953 |
-
/* .free_buffer = */ ggml_backend_multi_buffer_free_buffer,
|
| 954 |
-
/* .get_base = */ NULL,
|
| 955 |
-
/* .init_tensor = */ NULL,
|
| 956 |
-
/* .set_tensor = */ NULL,
|
| 957 |
-
/* .get_tensor = */ NULL,
|
| 958 |
-
/* .cpy_tensor = */ NULL,
|
| 959 |
-
/* .clear = */ ggml_backend_multi_buffer_clear,
|
| 960 |
-
/* .reset = */ NULL,
|
| 961 |
-
};
|
| 962 |
-
|
| 963 |
-
return multi_backend_buffer_i;
|
| 964 |
-
}
|
| 965 |
-
|
| 966 |
-
GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers) {
|
| 967 |
-
ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) malloc(sizeof(struct ggml_backend_multi_buffer_context));
|
| 968 |
-
ctx->n_buffers = n_buffers;
|
| 969 |
-
ctx->buffers = (ggml_backend_buffer_t *) malloc(n_buffers * sizeof(ggml_backend_buffer_t));
|
| 970 |
-
|
| 971 |
-
GGML_ASSERT(ctx->buffers != NULL);
|
| 972 |
-
|
| 973 |
-
size_t total_size = 0;
|
| 974 |
-
for (size_t i = 0; i < n_buffers; i++) {
|
| 975 |
-
ctx->buffers[i] = buffers[i];
|
| 976 |
-
total_size += ggml_backend_buffer_get_size(buffers[i]);
|
| 977 |
-
}
|
| 978 |
-
|
| 979 |
-
return ggml_backend_buffer_init(buffers[0]->buft, ggml_backend_multi_buffer_context_interface(), ctx, total_size);
|
| 980 |
-
}
|
| 981 |
-
|
| 982 |
-
GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer) {
|
| 983 |
-
return buffer->iface.get_name == ggml_backend_multi_buffer_get_name;
|
| 984 |
-
}
|
| 985 |
-
|
| 986 |
-
GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) {
|
| 987 |
-
GGML_ASSERT(ggml_backend_buffer_is_multi_buffer(buffer));
|
| 988 |
-
ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
|
| 989 |
-
for (size_t i = 0; i < ctx->n_buffers; i++) {
|
| 990 |
-
ggml_backend_buffer_set_usage(ctx->buffers[i], usage);
|
| 991 |
-
}
|
| 992 |
-
}
|
| 993 |
-
|
| 994 |
-
// creates a copy of the tensor with the same memory layout
|
| 995 |
-
static struct ggml_tensor * ggml_dup_tensor_layout(struct ggml_context * ctx, const struct ggml_tensor * tensor) {
|
| 996 |
-
struct ggml_tensor * dup = ggml_dup_tensor(ctx, tensor);
|
| 997 |
-
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
| 998 |
-
dup->nb[i] = tensor->nb[i];
|
| 999 |
-
}
|
| 1000 |
-
return dup;
|
| 1001 |
-
}
|
| 1002 |
-
|
| 1003 |
-
static bool ggml_is_view_op(enum ggml_op op) {
|
| 1004 |
-
return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE;
|
| 1005 |
-
}
|
| 1006 |
-
|
| 1007 |
-
// scheduler
|
| 1008 |
-
|
| 1009 |
-
#ifndef GGML_SCHED_MAX_BACKENDS
|
| 1010 |
-
#define GGML_SCHED_MAX_BACKENDS 16
|
| 1011 |
-
#endif
|
| 1012 |
-
|
| 1013 |
-
#ifndef GGML_SCHED_MAX_SPLITS
|
| 1014 |
-
#define GGML_SCHED_MAX_SPLITS 2048
|
| 1015 |
-
#endif
|
| 1016 |
-
|
| 1017 |
-
#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
|
| 1018 |
-
#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
|
| 1019 |
-
#endif
|
| 1020 |
-
|
| 1021 |
-
#ifndef GGML_SCHED_MAX_COPIES
|
| 1022 |
-
#define GGML_SCHED_MAX_COPIES 4
|
| 1023 |
-
#endif
|
| 1024 |
-
|
| 1025 |
-
struct ggml_backend_sched_split {
|
| 1026 |
-
int backend_id;
|
| 1027 |
-
int i_start;
|
| 1028 |
-
int i_end;
|
| 1029 |
-
struct ggml_tensor * inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
|
| 1030 |
-
int n_inputs;
|
| 1031 |
-
// graph view of this split
|
| 1032 |
-
struct ggml_cgraph graph;
|
| 1033 |
-
};
|
| 1034 |
-
|
| 1035 |
-
struct ggml_backend_sched {
|
| 1036 |
-
bool is_reset; // true if the scheduler has been reset since the last graph split
|
| 1037 |
-
bool is_alloc;
|
| 1038 |
-
|
| 1039 |
-
int n_backends;
|
| 1040 |
-
|
| 1041 |
-
ggml_backend_t backends[GGML_SCHED_MAX_BACKENDS];
|
| 1042 |
-
ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS];
|
| 1043 |
-
ggml_gallocr_t galloc;
|
| 1044 |
-
|
| 1045 |
-
// hash keys of the nodes in the graph
|
| 1046 |
-
struct ggml_hash_set hash_set;
|
| 1047 |
-
// hash values
|
| 1048 |
-
int * tensor_backend_id;
|
| 1049 |
-
struct ggml_tensor * (* tensor_copies)[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
|
| 1050 |
-
|
| 1051 |
-
int * node_backend_ids; // [graph_size]
|
| 1052 |
-
int * leaf_backend_ids; // [graph_size]
|
| 1053 |
-
|
| 1054 |
-
// copy of the graph with modified inputs
|
| 1055 |
-
struct ggml_cgraph * graph;
|
| 1056 |
-
|
| 1057 |
-
// graph splits
|
| 1058 |
-
struct ggml_backend_sched_split * splits;
|
| 1059 |
-
int n_splits;
|
| 1060 |
-
int splits_capacity;
|
| 1061 |
-
|
| 1062 |
-
// pipeline parallelism support
|
| 1063 |
-
int n_copies;
|
| 1064 |
-
int cur_copy;
|
| 1065 |
-
ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
|
| 1066 |
-
struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
|
| 1067 |
-
int n_graph_inputs;
|
| 1068 |
-
|
| 1069 |
-
struct ggml_context * ctx;
|
| 1070 |
-
|
| 1071 |
-
ggml_backend_sched_eval_callback callback_eval;
|
| 1072 |
-
void * callback_eval_user_data;
|
| 1073 |
-
|
| 1074 |
-
// align context_buffer to GGML_MEM_ALIGN
|
| 1075 |
-
#ifdef _MSC_VER
|
| 1076 |
-
__declspec(align(GGML_MEM_ALIGN))
|
| 1077 |
-
#else
|
| 1078 |
-
__attribute__((aligned(GGML_MEM_ALIGN)))
|
| 1079 |
-
#endif
|
| 1080 |
-
char context_buffer[GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)];
|
| 1081 |
-
};
|
| 1082 |
-
|
| 1083 |
-
#define hash_id(tensor) ggml_hash_find_or_insert(sched->hash_set, tensor)
|
| 1084 |
-
#define tensor_backend_id(tensor) sched->tensor_backend_id[hash_id(tensor)]
|
| 1085 |
-
|
| 1086 |
-
// returns the priority of the backend, lower id is higher priority
|
| 1087 |
-
static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
| 1088 |
-
for (int i = 0; i < sched->n_backends; i++) {
|
| 1089 |
-
if (sched->backends[i] == backend) {
|
| 1090 |
-
return i;
|
| 1091 |
-
}
|
| 1092 |
-
}
|
| 1093 |
-
return -1;
|
| 1094 |
-
}
|
| 1095 |
-
|
| 1096 |
-
static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor) {
|
| 1097 |
-
ggml_backend_buffer_t buffer = tensor->buffer;
|
| 1098 |
-
if (buffer == NULL) {
|
| 1099 |
-
return -1;
|
| 1100 |
-
}
|
| 1101 |
-
|
| 1102 |
-
// find highest prio backend that supports the buffer type
|
| 1103 |
-
for (int i = 0; i < sched->n_backends; i++) {
|
| 1104 |
-
if (ggml_backend_buft_supports_backend(buffer->buft, sched->backends[i])) {
|
| 1105 |
-
return i;
|
| 1106 |
-
}
|
| 1107 |
-
}
|
| 1108 |
-
|
| 1109 |
-
fprintf(stderr, "%s: error: no backend supports buffer type %s used in tensor %s\n",
|
| 1110 |
-
__func__, ggml_backend_buffer_name(buffer), tensor->name);
|
| 1111 |
-
GGML_ASSERT(false);
|
| 1112 |
-
|
| 1113 |
-
return -1;
|
| 1114 |
-
}
|
| 1115 |
-
|
| 1116 |
-
#if 0
|
| 1117 |
-
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
|
| 1118 |
-
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
|
| 1119 |
-
#define GET_CAUSE(node) causes[hash_id(node)]
|
| 1120 |
-
#else
|
| 1121 |
-
#define SET_CAUSE(node, ...)
|
| 1122 |
-
#define GET_CAUSE(node) ""
|
| 1123 |
-
#endif
|
| 1124 |
-
|
| 1125 |
-
// returns the backend that should be used for the node based on the current locations
|
| 1126 |
-
static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) {
|
| 1127 |
-
// TODO: use supports_op to check if the backend supports the op
|
| 1128 |
-
|
| 1129 |
-
// assign pre-allocated nodes to their backend
|
| 1130 |
-
int cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor);
|
| 1131 |
-
if (cur_backend_id != -1) {
|
| 1132 |
-
SET_CAUSE(tensor, "1.dst");
|
| 1133 |
-
return cur_backend_id;
|
| 1134 |
-
}
|
| 1135 |
-
|
| 1136 |
-
// view_src
|
| 1137 |
-
if (tensor->view_src != NULL) {
|
| 1138 |
-
cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src);
|
| 1139 |
-
if (cur_backend_id != -1) {
|
| 1140 |
-
SET_CAUSE(tensor, "1.vsrc");
|
| 1141 |
-
return cur_backend_id;
|
| 1142 |
-
}
|
| 1143 |
-
}
|
| 1144 |
-
|
| 1145 |
-
// graph input
|
| 1146 |
-
if (tensor->flags & GGML_TENSOR_FLAG_INPUT) {
|
| 1147 |
-
cur_backend_id = sched->n_backends - 1; // last backend (assumed CPU)
|
| 1148 |
-
SET_CAUSE(tensor, "1.inp");
|
| 1149 |
-
return cur_backend_id;
|
| 1150 |
-
}
|
| 1151 |
-
|
| 1152 |
-
// assign nodes that use weights to the backend of the weights
|
| 1153 |
-
// operations with weights are preferably run on the same backend as the weights
|
| 1154 |
-
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
| 1155 |
-
const struct ggml_tensor * src = tensor->src[i];
|
| 1156 |
-
if (src == NULL) {
|
| 1157 |
-
continue;
|
| 1158 |
-
}
|
| 1159 |
-
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
|
| 1160 |
-
int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src);
|
| 1161 |
-
// check if a backend with higher prio wants to offload the op
|
| 1162 |
-
if (src_backend_id == sched->n_backends - 1) {
|
| 1163 |
-
for (int b = 0; b < src_backend_id; b++) {
|
| 1164 |
-
if (ggml_backend_offload_op(sched->backends[b], tensor)) {
|
| 1165 |
-
SET_CAUSE(tensor, "1.off");
|
| 1166 |
-
return b;
|
| 1167 |
-
}
|
| 1168 |
-
}
|
| 1169 |
-
}
|
| 1170 |
-
SET_CAUSE(tensor, "1.wgt%d", i);
|
| 1171 |
-
return src_backend_id;
|
| 1172 |
-
}
|
| 1173 |
-
}
|
| 1174 |
-
|
| 1175 |
-
return -1;
|
| 1176 |
-
}
|
| 1177 |
-
|
| 1178 |
-
static char * fmt_size(size_t size) {
|
| 1179 |
-
static char buffer[128];
|
| 1180 |
-
if (size >= 1024*1024) {
|
| 1181 |
-
sprintf(buffer, "%zuM", size/1024/1024);
|
| 1182 |
-
} else {
|
| 1183 |
-
sprintf(buffer, "%zuK", size/1024);
|
| 1184 |
-
}
|
| 1185 |
-
return buffer;
|
| 1186 |
-
}
|
| 1187 |
-
|
| 1188 |
-
static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
| 1189 |
-
int cur_split = 0;
|
| 1190 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1191 |
-
if (cur_split < sched->n_splits && i == sched->splits[cur_split].i_start) {
|
| 1192 |
-
ggml_backend_t split_backend = sched->backends[sched->splits[cur_split].backend_id];
|
| 1193 |
-
fprintf(stderr, "\n## SPLIT #%d: %s # %d inputs: ", cur_split, ggml_backend_name(split_backend),
|
| 1194 |
-
sched->splits[cur_split].n_inputs);
|
| 1195 |
-
for (int j = 0; j < sched->splits[cur_split].n_inputs; j++) {
|
| 1196 |
-
fprintf(stderr, "[%s (%5.5s)] ", sched->splits[cur_split].inputs[j]->name,
|
| 1197 |
-
fmt_size(ggml_nbytes(sched->splits[cur_split].inputs[j])));
|
| 1198 |
-
}
|
| 1199 |
-
fprintf(stderr, "\n");
|
| 1200 |
-
cur_split++;
|
| 1201 |
-
}
|
| 1202 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 1203 |
-
if (ggml_is_view_op(node->op)) {
|
| 1204 |
-
continue;
|
| 1205 |
-
}
|
| 1206 |
-
ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
|
| 1207 |
-
fprintf(stderr, "node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
|
| 1208 |
-
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node));
|
| 1209 |
-
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 1210 |
-
struct ggml_tensor * src = node->src[j];
|
| 1211 |
-
if (src == NULL) {
|
| 1212 |
-
continue;
|
| 1213 |
-
}
|
| 1214 |
-
ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src);
|
| 1215 |
-
fprintf(stderr, " %20.20s (%5.5s) [%5.5s %8.8s]", src->name,
|
| 1216 |
-
fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src));
|
| 1217 |
-
}
|
| 1218 |
-
fprintf(stderr, "\n");
|
| 1219 |
-
}
|
| 1220 |
-
}
|
| 1221 |
-
|
| 1222 |
-
//#define DEBUG_PASS1
|
| 1223 |
-
//#define DEBUG_PASS2
|
| 1224 |
-
//#define DEBUG_PASS3
|
| 1225 |
-
//#define DEBUG_PASS4
|
| 1226 |
-
|
| 1227 |
-
// assigns backends to ops and splits the graph into subgraphs that can be computed on the same backend
|
| 1228 |
-
static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
| 1229 |
-
// reset splits
|
| 1230 |
-
sched->n_splits = 0;
|
| 1231 |
-
sched->n_graph_inputs = 0;
|
| 1232 |
-
sched->is_reset = false;
|
| 1233 |
-
|
| 1234 |
-
struct ggml_init_params params = {
|
| 1235 |
-
/* .mem_size = */ sizeof(sched->context_buffer),
|
| 1236 |
-
/* .mem_buffer = */ sched->context_buffer,
|
| 1237 |
-
/* .no_alloc = */ true
|
| 1238 |
-
};
|
| 1239 |
-
|
| 1240 |
-
ggml_free(sched->ctx);
|
| 1241 |
-
|
| 1242 |
-
sched->ctx = ggml_init(params);
|
| 1243 |
-
if (sched->ctx == NULL) {
|
| 1244 |
-
fprintf(stderr, "%s: failed to initialize context\n", __func__);
|
| 1245 |
-
GGML_ASSERT(false);
|
| 1246 |
-
}
|
| 1247 |
-
|
| 1248 |
-
// pass 1: assign backends to ops with pre-allocated inputs
|
| 1249 |
-
for (int i = 0; i < graph->n_leafs; i++) {
|
| 1250 |
-
struct ggml_tensor * leaf = graph->leafs[i];
|
| 1251 |
-
int * leaf_backend_id = &tensor_backend_id(leaf);
|
| 1252 |
-
if (*leaf_backend_id != -1) {
|
| 1253 |
-
// do not overwrite user assignments
|
| 1254 |
-
continue;
|
| 1255 |
-
}
|
| 1256 |
-
*leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf);
|
| 1257 |
-
}
|
| 1258 |
-
|
| 1259 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1260 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 1261 |
-
int * node_backend_id = &tensor_backend_id(node);
|
| 1262 |
-
if (*node_backend_id != -1) {
|
| 1263 |
-
// do not overwrite user assignments
|
| 1264 |
-
continue;
|
| 1265 |
-
}
|
| 1266 |
-
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
|
| 1267 |
-
// src
|
| 1268 |
-
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 1269 |
-
struct ggml_tensor * src = node->src[j];
|
| 1270 |
-
if (src == NULL) {
|
| 1271 |
-
continue;
|
| 1272 |
-
}
|
| 1273 |
-
int * src_backend_id = &tensor_backend_id(src);
|
| 1274 |
-
if (*src_backend_id == -1) {
|
| 1275 |
-
*src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src);
|
| 1276 |
-
}
|
| 1277 |
-
}
|
| 1278 |
-
}
|
| 1279 |
-
#ifdef DEBUG_PASS1
|
| 1280 |
-
fprintf(stderr, "PASS 1 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
|
| 1281 |
-
#endif
|
| 1282 |
-
|
| 1283 |
-
// pass 2: expand current backend assignments
|
| 1284 |
-
// assign the same backend to adjacent nodes
|
| 1285 |
-
// expand gpu backends (i.e. non last prio) up and down, ignoring cpu (the lowest priority backend)
|
| 1286 |
-
// thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
// pass 2.2 expand gpu down
|
| 1290 |
-
{
|
| 1291 |
-
int cur_backend_id = -1;
|
| 1292 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1293 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 1294 |
-
if (ggml_is_view_op(node->op)) {
|
| 1295 |
-
continue;
|
| 1296 |
-
}
|
| 1297 |
-
int * node_backend_id = &tensor_backend_id(node);
|
| 1298 |
-
if (*node_backend_id != -1) {
|
| 1299 |
-
if (*node_backend_id == sched->n_backends - 1) {
|
| 1300 |
-
// skip cpu (lowest prio backend)
|
| 1301 |
-
cur_backend_id = -1;
|
| 1302 |
-
} else {
|
| 1303 |
-
cur_backend_id = *node_backend_id;
|
| 1304 |
-
}
|
| 1305 |
-
} else {
|
| 1306 |
-
*node_backend_id = cur_backend_id;
|
| 1307 |
-
SET_CAUSE(node, "2.2");
|
| 1308 |
-
}
|
| 1309 |
-
}
|
| 1310 |
-
}
|
| 1311 |
-
// pass 2.1 expand gpu up
|
| 1312 |
-
{
|
| 1313 |
-
int cur_backend_id = -1;
|
| 1314 |
-
for (int i = graph->n_nodes - 1; i >= 0; i--) {
|
| 1315 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 1316 |
-
if (ggml_is_view_op(node->op)) {
|
| 1317 |
-
continue;
|
| 1318 |
-
}
|
| 1319 |
-
int * node_backend_id = &tensor_backend_id(node);
|
| 1320 |
-
if (*node_backend_id != -1) {
|
| 1321 |
-
if (*node_backend_id == sched->n_backends - 1) {
|
| 1322 |
-
// skip cpu (lowest prio backend)
|
| 1323 |
-
cur_backend_id = -1;
|
| 1324 |
-
} else {
|
| 1325 |
-
cur_backend_id = *node_backend_id;
|
| 1326 |
-
}
|
| 1327 |
-
} else {
|
| 1328 |
-
*node_backend_id = cur_backend_id;
|
| 1329 |
-
SET_CAUSE(node, "2.1");
|
| 1330 |
-
}
|
| 1331 |
-
}
|
| 1332 |
-
}
|
| 1333 |
-
// pass 2.4 expand rest down
|
| 1334 |
-
{
|
| 1335 |
-
int cur_backend_id = -1;
|
| 1336 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1337 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 1338 |
-
if (ggml_is_view_op(node->op)) {
|
| 1339 |
-
continue;
|
| 1340 |
-
}
|
| 1341 |
-
int * node_backend_id = &tensor_backend_id(node);
|
| 1342 |
-
if (*node_backend_id != -1) {
|
| 1343 |
-
cur_backend_id = *node_backend_id;
|
| 1344 |
-
} else {
|
| 1345 |
-
*node_backend_id = cur_backend_id;
|
| 1346 |
-
SET_CAUSE(node, "2.4");
|
| 1347 |
-
}
|
| 1348 |
-
}
|
| 1349 |
-
}
|
| 1350 |
-
// pass 2.3 expand rest up
|
| 1351 |
-
{
|
| 1352 |
-
int cur_backend_id = -1;
|
| 1353 |
-
for (int i = graph->n_nodes - 1; i >= 0; i--) {
|
| 1354 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 1355 |
-
if (ggml_is_view_op(node->op)) {
|
| 1356 |
-
continue;
|
| 1357 |
-
}
|
| 1358 |
-
int * node_backend_id = &tensor_backend_id(node);
|
| 1359 |
-
if (*node_backend_id != -1) {
|
| 1360 |
-
cur_backend_id = *node_backend_id;
|
| 1361 |
-
} else {
|
| 1362 |
-
*node_backend_id = cur_backend_id;
|
| 1363 |
-
SET_CAUSE(node, "2.3");
|
| 1364 |
-
}
|
| 1365 |
-
}
|
| 1366 |
-
}
|
| 1367 |
-
|
| 1368 |
-
#ifdef DEBUG_PASS2
|
| 1369 |
-
fprintf(stderr, "PASS 2 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
|
| 1370 |
-
#endif
|
| 1371 |
-
|
| 1372 |
-
// pass 3: assign backends to remaining src from dst and view_src
|
| 1373 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1374 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 1375 |
-
int * cur_backend_id = &tensor_backend_id(node);
|
| 1376 |
-
if (node->view_src != NULL && *cur_backend_id == -1) {
|
| 1377 |
-
*cur_backend_id = tensor_backend_id(node->view_src);
|
| 1378 |
-
SET_CAUSE(node, "3.vsrc");
|
| 1379 |
-
}
|
| 1380 |
-
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 1381 |
-
struct ggml_tensor * src = node->src[j];
|
| 1382 |
-
if (src == NULL) {
|
| 1383 |
-
continue;
|
| 1384 |
-
}
|
| 1385 |
-
int * src_backend_id = &tensor_backend_id(src);
|
| 1386 |
-
if (*src_backend_id == -1) {
|
| 1387 |
-
if (src->view_src != NULL) {
|
| 1388 |
-
// views are always on the same backend as the source
|
| 1389 |
-
*src_backend_id = tensor_backend_id(src->view_src);
|
| 1390 |
-
SET_CAUSE(src, "3.vsrc");
|
| 1391 |
-
} else {
|
| 1392 |
-
*src_backend_id = *cur_backend_id;
|
| 1393 |
-
SET_CAUSE(src, "3.cur");
|
| 1394 |
-
}
|
| 1395 |
-
}
|
| 1396 |
-
}
|
| 1397 |
-
}
|
| 1398 |
-
#ifdef DEBUG_PASS3
|
| 1399 |
-
fprintf(stderr, "PASS 3 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
|
| 1400 |
-
#endif
|
| 1401 |
-
|
| 1402 |
-
// pass 4: split graph, find tensors that need to be copied
|
| 1403 |
-
{
|
| 1404 |
-
int i_split = 0;
|
| 1405 |
-
struct ggml_backend_sched_split * split = &sched->splits[0];
|
| 1406 |
-
// find the backend of the first split, skipping view ops
|
| 1407 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1408 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 1409 |
-
if (!ggml_is_view_op(node->op)) {
|
| 1410 |
-
split->backend_id = tensor_backend_id(node);
|
| 1411 |
-
break;
|
| 1412 |
-
}
|
| 1413 |
-
}
|
| 1414 |
-
split->i_start = 0;
|
| 1415 |
-
split->n_inputs = 0;
|
| 1416 |
-
memset(split->inputs, 0, sizeof(split->inputs)); //HACK
|
| 1417 |
-
int cur_backend_id = split->backend_id;
|
| 1418 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1419 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 1420 |
-
|
| 1421 |
-
if (ggml_is_view_op(node->op)) {
|
| 1422 |
-
continue;
|
| 1423 |
-
}
|
| 1424 |
-
|
| 1425 |
-
const int node_backend_id = tensor_backend_id(node);
|
| 1426 |
-
|
| 1427 |
-
GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now
|
| 1428 |
-
|
| 1429 |
-
// check if we should start a new split based on the sources of the current node
|
| 1430 |
-
bool need_new_split = false;
|
| 1431 |
-
if (node_backend_id == cur_backend_id && split->n_inputs > 0) {
|
| 1432 |
-
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 1433 |
-
struct ggml_tensor * src = node->src[j];
|
| 1434 |
-
if (src == NULL) {
|
| 1435 |
-
continue;
|
| 1436 |
-
}
|
| 1437 |
-
// check if a weight is on a different backend
|
| 1438 |
-
// by starting a new split, the memory of the previously offloaded weights can be reused
|
| 1439 |
-
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
|
| 1440 |
-
int src_backend_id = tensor_backend_id(src);
|
| 1441 |
-
if (src_backend_id != -1 && src_backend_id != cur_backend_id) {
|
| 1442 |
-
need_new_split = true;
|
| 1443 |
-
break;
|
| 1444 |
-
}
|
| 1445 |
-
}
|
| 1446 |
-
// check if the split has too many inputs
|
| 1447 |
-
if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) {
|
| 1448 |
-
const size_t id = hash_id(src);
|
| 1449 |
-
int src_backend_id = sched->tensor_backend_id[id];
|
| 1450 |
-
if (src_backend_id != cur_backend_id && sched->tensor_copies[hash_id(src)][cur_backend_id][0] == NULL) {
|
| 1451 |
-
//printf("starting new split because of too many inputs: node %s, input %s\n", node->name, src->name);
|
| 1452 |
-
need_new_split = true;
|
| 1453 |
-
break;
|
| 1454 |
-
}
|
| 1455 |
-
}
|
| 1456 |
-
}
|
| 1457 |
-
}
|
| 1458 |
-
|
| 1459 |
-
if (node_backend_id != cur_backend_id || need_new_split) {
|
| 1460 |
-
split->i_end = i;
|
| 1461 |
-
i_split++;
|
| 1462 |
-
if (i_split >= sched->splits_capacity) {
|
| 1463 |
-
sched->splits_capacity *= 2;
|
| 1464 |
-
sched->splits = realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split));
|
| 1465 |
-
GGML_ASSERT(sched->splits != NULL);
|
| 1466 |
-
}
|
| 1467 |
-
GGML_ASSERT(i_split < GGML_SCHED_MAX_SPLITS);
|
| 1468 |
-
split = &sched->splits[i_split];
|
| 1469 |
-
split->backend_id = node_backend_id;
|
| 1470 |
-
split->i_start = i;
|
| 1471 |
-
split->n_inputs = 0;
|
| 1472 |
-
cur_backend_id = node_backend_id;
|
| 1473 |
-
}
|
| 1474 |
-
|
| 1475 |
-
// find inputs that are not on the same backend
|
| 1476 |
-
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 1477 |
-
struct ggml_tensor * src = node->src[j];
|
| 1478 |
-
if (src == NULL) {
|
| 1479 |
-
continue;
|
| 1480 |
-
}
|
| 1481 |
-
|
| 1482 |
-
const int src_backend_id = tensor_backend_id(src);
|
| 1483 |
-
assert(src_backend_id != -1); // all inputs should be assigned by now
|
| 1484 |
-
|
| 1485 |
-
if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
|
| 1486 |
-
size_t id = hash_id(src);
|
| 1487 |
-
if (sched->tensor_copies[id][src_backend_id][0] == NULL) {
|
| 1488 |
-
ggml_backend_t backend = sched->backends[src_backend_id];
|
| 1489 |
-
for (int c = 0; c < sched->n_copies; c++) {
|
| 1490 |
-
struct ggml_tensor * tensor_copy;
|
| 1491 |
-
if (c == sched->cur_copy) {
|
| 1492 |
-
tensor_copy = src; // use the original tensor as the current copy
|
| 1493 |
-
} else {
|
| 1494 |
-
tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
|
| 1495 |
-
ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
|
| 1496 |
-
}
|
| 1497 |
-
if (sched->n_copies > 1) {
|
| 1498 |
-
ggml_set_input(tensor_copy);
|
| 1499 |
-
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
| 1500 |
-
}
|
| 1501 |
-
sched->tensor_copies[id][src_backend_id][c] = tensor_copy;
|
| 1502 |
-
SET_CAUSE(tensor_copy, "4.cpy");
|
| 1503 |
-
}
|
| 1504 |
-
int n_graph_inputs = sched->n_graph_inputs++;
|
| 1505 |
-
GGML_ASSERT(n_graph_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
|
| 1506 |
-
sched->graph_inputs[n_graph_inputs] = src;
|
| 1507 |
-
}
|
| 1508 |
-
}
|
| 1509 |
-
|
| 1510 |
-
if (src_backend_id != node_backend_id) {
|
| 1511 |
-
// create a copy of the input in the split's backend
|
| 1512 |
-
const size_t id = hash_id(src);
|
| 1513 |
-
if (sched->tensor_copies[id][cur_backend_id][0] == NULL) {
|
| 1514 |
-
ggml_backend_t backend = sched->backends[cur_backend_id];
|
| 1515 |
-
for (int c = 0; c < sched->n_copies; c++) {
|
| 1516 |
-
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
|
| 1517 |
-
ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
|
| 1518 |
-
if (sched->n_copies > 1) {
|
| 1519 |
-
ggml_set_input(tensor_copy);
|
| 1520 |
-
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
| 1521 |
-
}
|
| 1522 |
-
sched->tensor_copies[id][cur_backend_id][c] = tensor_copy;
|
| 1523 |
-
SET_CAUSE(tensor_copy, "4.cpy");
|
| 1524 |
-
}
|
| 1525 |
-
int n_inputs = split->n_inputs++;
|
| 1526 |
-
GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
|
| 1527 |
-
split->inputs[n_inputs] = src;
|
| 1528 |
-
}
|
| 1529 |
-
node->src[j] = sched->tensor_copies[id][cur_backend_id][sched->cur_copy];
|
| 1530 |
-
}
|
| 1531 |
-
}
|
| 1532 |
-
}
|
| 1533 |
-
split->i_end = graph->n_nodes;
|
| 1534 |
-
sched->n_splits = i_split + 1;
|
| 1535 |
-
}
|
| 1536 |
-
#ifdef DEBUG_PASS4
|
| 1537 |
-
fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
|
| 1538 |
-
#endif
|
| 1539 |
-
|
| 1540 |
-
// create copies of the graph for each split
|
| 1541 |
-
// TODO: avoid this copy
|
| 1542 |
-
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2, false);
|
| 1543 |
-
for (int i = 0; i < sched->n_splits; i++) {
|
| 1544 |
-
struct ggml_backend_sched_split * split = &sched->splits[i];
|
| 1545 |
-
split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
|
| 1546 |
-
|
| 1547 |
-
// add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
|
| 1548 |
-
for (int j = 0; j < split->n_inputs; j++) {
|
| 1549 |
-
assert(graph_copy->size > (graph_copy->n_nodes + 1));
|
| 1550 |
-
|
| 1551 |
-
struct ggml_tensor * input = split->inputs[j];
|
| 1552 |
-
const size_t input_id = hash_id(input);
|
| 1553 |
-
struct ggml_tensor * input_cpy = sched->tensor_copies[input_id][split->backend_id][sched->cur_copy];
|
| 1554 |
-
|
| 1555 |
-
// add a dependency to the input source so that it is not freed before the copy is done
|
| 1556 |
-
struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input);
|
| 1557 |
-
input_dep->src[0] = input;
|
| 1558 |
-
sched->node_backend_ids[graph_copy->n_nodes] = sched->tensor_backend_id[input_id];
|
| 1559 |
-
graph_copy->nodes[graph_copy->n_nodes++] = input_dep;
|
| 1560 |
-
|
| 1561 |
-
// add a dependency to the input copy so that it is allocated at the start of the split
|
| 1562 |
-
sched->node_backend_ids[graph_copy->n_nodes] = split->backend_id;
|
| 1563 |
-
graph_copy->nodes[graph_copy->n_nodes++] = input_cpy;
|
| 1564 |
-
}
|
| 1565 |
-
|
| 1566 |
-
for (int j = split->i_start; j < split->i_end; j++) {
|
| 1567 |
-
assert(graph_copy->size > graph_copy->n_nodes);
|
| 1568 |
-
sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(graph->nodes[j]);
|
| 1569 |
-
graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j];
|
| 1570 |
-
}
|
| 1571 |
-
}
|
| 1572 |
-
|
| 1573 |
-
if (sched->n_copies > 1) {
|
| 1574 |
-
// add input copies as leafs so that they are allocated first
|
| 1575 |
-
for (int i = 0; i < sched->n_graph_inputs; i++) {
|
| 1576 |
-
struct ggml_tensor * input = sched->graph_inputs[i];
|
| 1577 |
-
size_t id = hash_id(input);
|
| 1578 |
-
int backend_id = tensor_backend_id(input);
|
| 1579 |
-
for (int c = 0; c < sched->n_copies; c++) {
|
| 1580 |
-
struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
|
| 1581 |
-
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
|
| 1582 |
-
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
|
| 1583 |
-
}
|
| 1584 |
-
}
|
| 1585 |
-
|
| 1586 |
-
for (int i = 0; i < sched->n_splits; i++) {
|
| 1587 |
-
struct ggml_backend_sched_split * split = &sched->splits[i];
|
| 1588 |
-
int backend_id = split->backend_id;
|
| 1589 |
-
for (int j = 0; j < split->n_inputs; j++) {
|
| 1590 |
-
struct ggml_tensor * input = split->inputs[j];
|
| 1591 |
-
size_t id = hash_id(input);
|
| 1592 |
-
for (int c = 0; c < sched->n_copies; c++) {
|
| 1593 |
-
struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
|
| 1594 |
-
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
|
| 1595 |
-
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
|
| 1596 |
-
}
|
| 1597 |
-
}
|
| 1598 |
-
}
|
| 1599 |
-
}
|
| 1600 |
-
|
| 1601 |
-
// add leafs from the original graph
|
| 1602 |
-
for (int i = 0; i < graph->n_leafs; i++) {
|
| 1603 |
-
struct ggml_tensor * leaf = graph->leafs[i];
|
| 1604 |
-
sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf);
|
| 1605 |
-
graph_copy->leafs[graph_copy->n_leafs++] = leaf;
|
| 1606 |
-
}
|
| 1607 |
-
|
| 1608 |
-
sched->graph = graph_copy;
|
| 1609 |
-
}
|
| 1610 |
-
|
| 1611 |
-
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
| 1612 |
-
// allocate graph
|
| 1613 |
-
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
|
| 1614 |
-
// the re-allocation may cause the split inputs to be moved to a different address
|
| 1615 |
-
ggml_backend_sched_synchronize(sched);
|
| 1616 |
-
#ifndef NDEBUG
|
| 1617 |
-
fprintf(stderr, "%s: failed to allocate graph, reserving\n", __func__);
|
| 1618 |
-
#endif
|
| 1619 |
-
ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
|
| 1620 |
-
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
|
| 1621 |
-
fprintf(stderr, "%s: failed to allocate graph\n", __func__);
|
| 1622 |
-
return false;
|
| 1623 |
-
}
|
| 1624 |
-
}
|
| 1625 |
-
|
| 1626 |
-
return true;
|
| 1627 |
-
}
|
| 1628 |
-
|
| 1629 |
-
static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
|
| 1630 |
-
struct ggml_backend_sched_split * splits = sched->splits;
|
| 1631 |
-
|
| 1632 |
-
for (int i = 0; i < sched->n_splits; i++) {
|
| 1633 |
-
struct ggml_backend_sched_split * split = &splits[i];
|
| 1634 |
-
int split_backend_id = split->backend_id;
|
| 1635 |
-
ggml_backend_t split_backend = sched->backends[split_backend_id];
|
| 1636 |
-
|
| 1637 |
-
// copy the input tensors to the split backend
|
| 1638 |
-
for (int j = 0; j < split->n_inputs; j++) {
|
| 1639 |
-
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]);
|
| 1640 |
-
struct ggml_tensor * input = split->inputs[j];
|
| 1641 |
-
struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id][sched->cur_copy];
|
| 1642 |
-
|
| 1643 |
-
if (input->flags & GGML_TENSOR_FLAG_INPUT) {
|
| 1644 |
-
// inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
|
| 1645 |
-
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
|
| 1646 |
-
ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
|
| 1647 |
-
} else {
|
| 1648 |
-
ggml_backend_synchronize(split_backend);
|
| 1649 |
-
}
|
| 1650 |
-
ggml_backend_tensor_copy(input, input_cpy);
|
| 1651 |
-
} else {
|
| 1652 |
-
// wait for the split backend to finish using the input before overwriting it
|
| 1653 |
-
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
|
| 1654 |
-
ggml_backend_event_wait(split_backend, sched->events[split_backend_id][sched->cur_copy]);
|
| 1655 |
-
} else {
|
| 1656 |
-
ggml_backend_synchronize(split_backend);
|
| 1657 |
-
}
|
| 1658 |
-
ggml_backend_tensor_copy_async(input_backend, split_backend, input, input_cpy);
|
| 1659 |
-
}
|
| 1660 |
-
}
|
| 1661 |
-
|
| 1662 |
-
if (!sched->callback_eval) {
|
| 1663 |
-
enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &split->graph);
|
| 1664 |
-
if (ec != GGML_STATUS_SUCCESS) {
|
| 1665 |
-
return ec;
|
| 1666 |
-
}
|
| 1667 |
-
} else {
|
| 1668 |
-
// similar to ggml_backend_compare_graph_backend
|
| 1669 |
-
for (int j0 = 0; j0 < split->graph.n_nodes; j0++) {
|
| 1670 |
-
struct ggml_tensor * t = split->graph.nodes[j0];
|
| 1671 |
-
|
| 1672 |
-
// check if the user needs data from this node
|
| 1673 |
-
bool need = sched->callback_eval(t, true, sched->callback_eval_user_data);
|
| 1674 |
-
|
| 1675 |
-
int j1 = j0;
|
| 1676 |
-
|
| 1677 |
-
// determine the range [j0, j1] of nodes that can be computed together
|
| 1678 |
-
while (!need && j1 < split->graph.n_nodes - 1) {
|
| 1679 |
-
t = split->graph.nodes[++j1];
|
| 1680 |
-
need = sched->callback_eval(t, true, sched->callback_eval_user_data);
|
| 1681 |
-
}
|
| 1682 |
-
|
| 1683 |
-
struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1);
|
| 1684 |
-
|
| 1685 |
-
enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &gv);
|
| 1686 |
-
if (ec != GGML_STATUS_SUCCESS) {
|
| 1687 |
-
return ec;
|
| 1688 |
-
}
|
| 1689 |
-
|
| 1690 |
-
// TODO: pass backend to the callback, then the user can decide if they want to synchronize
|
| 1691 |
-
ggml_backend_synchronize(split_backend);
|
| 1692 |
-
|
| 1693 |
-
if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) {
|
| 1694 |
-
break;
|
| 1695 |
-
}
|
| 1696 |
-
|
| 1697 |
-
j0 = j1;
|
| 1698 |
-
}
|
| 1699 |
-
}
|
| 1700 |
-
|
| 1701 |
-
// record the event of this copy
|
| 1702 |
-
if (split->n_inputs > 0) {
|
| 1703 |
-
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
|
| 1704 |
-
ggml_backend_event_record(sched->events[split_backend_id][sched->cur_copy]);
|
| 1705 |
-
}
|
| 1706 |
-
}
|
| 1707 |
-
}
|
| 1708 |
-
|
| 1709 |
-
sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies;
|
| 1710 |
-
|
| 1711 |
-
return GGML_STATUS_SUCCESS;
|
| 1712 |
-
}
|
| 1713 |
-
|
| 1714 |
-
ggml_backend_sched_t ggml_backend_sched_new(
|
| 1715 |
-
ggml_backend_t * backends,
|
| 1716 |
-
ggml_backend_buffer_type_t * bufts,
|
| 1717 |
-
int n_backends,
|
| 1718 |
-
size_t graph_size,
|
| 1719 |
-
bool parallel) {
|
| 1720 |
-
GGML_ASSERT(n_backends > 0);
|
| 1721 |
-
GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
|
| 1722 |
-
GGML_ASSERT(ggml_backend_is_cpu(backends[n_backends - 1])); // last backend must be CPU
|
| 1723 |
-
|
| 1724 |
-
struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1);
|
| 1725 |
-
|
| 1726 |
-
// initialize hash table
|
| 1727 |
-
sched->hash_set = ggml_hash_set_new(graph_size);
|
| 1728 |
-
sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size);
|
| 1729 |
-
sched->tensor_copies = calloc(sizeof(sched->tensor_copies[0]), sched->hash_set.size);
|
| 1730 |
-
|
| 1731 |
-
const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
|
| 1732 |
-
sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), nodes_size);
|
| 1733 |
-
sched->leaf_backend_ids = calloc(sizeof(sched->leaf_backend_ids[0]), nodes_size);
|
| 1734 |
-
|
| 1735 |
-
sched->n_backends = n_backends;
|
| 1736 |
-
|
| 1737 |
-
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
|
| 1738 |
-
|
| 1739 |
-
const int initial_splits_capacity = 16;
|
| 1740 |
-
sched->splits = calloc(sizeof(sched->splits[0]), initial_splits_capacity);
|
| 1741 |
-
sched->splits_capacity = initial_splits_capacity;
|
| 1742 |
-
|
| 1743 |
-
for (int b = 0; b < n_backends; b++) {
|
| 1744 |
-
sched->backends[b] = backends[b];
|
| 1745 |
-
sched->bufts[b] = bufts ? bufts[b] : ggml_backend_get_default_buffer_type(backends[b]);
|
| 1746 |
-
GGML_ASSERT(ggml_backend_buft_supports_backend(sched->bufts[b], backends[b]));
|
| 1747 |
-
if (sched->n_copies > 1) {
|
| 1748 |
-
for (int c = 0; c < sched->n_copies; c++) {
|
| 1749 |
-
sched->events[b][c] = ggml_backend_event_new(backends[b]);
|
| 1750 |
-
}
|
| 1751 |
-
}
|
| 1752 |
-
}
|
| 1753 |
-
|
| 1754 |
-
sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends);
|
| 1755 |
-
|
| 1756 |
-
ggml_backend_sched_reset(sched);
|
| 1757 |
-
|
| 1758 |
-
return sched;
|
| 1759 |
-
}
|
| 1760 |
-
|
| 1761 |
-
void ggml_backend_sched_free(ggml_backend_sched_t sched) {
|
| 1762 |
-
if (sched == NULL) {
|
| 1763 |
-
return;
|
| 1764 |
-
}
|
| 1765 |
-
for (int b = 0; b < sched->n_backends; b++) {
|
| 1766 |
-
for (int c = 0; c < sched->n_copies; c++) {
|
| 1767 |
-
ggml_backend_event_free(sched->events[b][c]);
|
| 1768 |
-
}
|
| 1769 |
-
}
|
| 1770 |
-
ggml_gallocr_free(sched->galloc);
|
| 1771 |
-
ggml_free(sched->ctx);
|
| 1772 |
-
free(sched->splits);
|
| 1773 |
-
free(sched->hash_set.keys);
|
| 1774 |
-
free(sched->tensor_backend_id);
|
| 1775 |
-
free(sched->tensor_copies);
|
| 1776 |
-
free(sched->node_backend_ids);
|
| 1777 |
-
free(sched->leaf_backend_ids);
|
| 1778 |
-
free(sched);
|
| 1779 |
-
}
|
| 1780 |
-
|
| 1781 |
-
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
| 1782 |
-
// reset state for the next run
|
| 1783 |
-
size_t hash_size = sched->hash_set.size;
|
| 1784 |
-
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
|
| 1785 |
-
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
|
| 1786 |
-
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
|
| 1787 |
-
|
| 1788 |
-
sched->is_reset = true;
|
| 1789 |
-
sched->is_alloc = false;
|
| 1790 |
-
}
|
| 1791 |
-
|
| 1792 |
-
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
|
| 1793 |
-
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes);
|
| 1794 |
-
|
| 1795 |
-
ggml_backend_sched_split_graph(sched, measure_graph);
|
| 1796 |
-
|
| 1797 |
-
// TODO: extract this to a separate function
|
| 1798 |
-
if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
|
| 1799 |
-
return false;
|
| 1800 |
-
}
|
| 1801 |
-
|
| 1802 |
-
ggml_backend_sched_reset(sched);
|
| 1803 |
-
ggml_backend_sched_synchronize(sched);
|
| 1804 |
-
|
| 1805 |
-
return true;
|
| 1806 |
-
}
|
| 1807 |
-
|
| 1808 |
-
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
| 1809 |
-
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes);
|
| 1810 |
-
|
| 1811 |
-
ggml_backend_sched_split_graph(sched, graph);
|
| 1812 |
-
|
| 1813 |
-
if (!ggml_backend_sched_alloc_splits(sched)) {
|
| 1814 |
-
return false;
|
| 1815 |
-
}
|
| 1816 |
-
|
| 1817 |
-
sched->is_alloc = true;
|
| 1818 |
-
|
| 1819 |
-
return true;
|
| 1820 |
-
}
|
| 1821 |
-
|
| 1822 |
-
enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
| 1823 |
-
enum ggml_status err = ggml_backend_sched_graph_compute_async(sched, graph);
|
| 1824 |
-
ggml_backend_sched_synchronize(sched);
|
| 1825 |
-
return err;
|
| 1826 |
-
}
|
| 1827 |
-
|
| 1828 |
-
enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
| 1829 |
-
if (!sched->is_reset && !sched->is_alloc) {
|
| 1830 |
-
ggml_backend_sched_reset(sched);
|
| 1831 |
-
}
|
| 1832 |
-
|
| 1833 |
-
if (!sched->is_alloc) {
|
| 1834 |
-
if (!ggml_backend_sched_alloc_graph(sched, graph)) {
|
| 1835 |
-
return GGML_STATUS_ALLOC_FAILED;
|
| 1836 |
-
}
|
| 1837 |
-
}
|
| 1838 |
-
|
| 1839 |
-
return ggml_backend_sched_compute_splits(sched);
|
| 1840 |
-
}
|
| 1841 |
-
|
| 1842 |
-
void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
|
| 1843 |
-
for (int i = 0; i < sched->n_backends; i++) {
|
| 1844 |
-
ggml_backend_synchronize(sched->backends[i]);
|
| 1845 |
-
}
|
| 1846 |
-
}
|
| 1847 |
-
|
| 1848 |
-
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
|
| 1849 |
-
sched->callback_eval = callback;
|
| 1850 |
-
sched->callback_eval_user_data = user_data;
|
| 1851 |
-
}
|
| 1852 |
-
|
| 1853 |
-
int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) {
|
| 1854 |
-
return sched->n_splits;
|
| 1855 |
-
}
|
| 1856 |
-
|
| 1857 |
-
int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) {
|
| 1858 |
-
return sched->n_copies;
|
| 1859 |
-
}
|
| 1860 |
-
|
| 1861 |
-
size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
| 1862 |
-
int backend_index = ggml_backend_sched_backend_id(sched, backend);
|
| 1863 |
-
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
| 1864 |
-
|
| 1865 |
-
return ggml_gallocr_get_buffer_size(sched->galloc, backend_index);
|
| 1866 |
-
}
|
| 1867 |
-
|
| 1868 |
-
void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
|
| 1869 |
-
int backend_index = ggml_backend_sched_backend_id(sched, backend);
|
| 1870 |
-
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
| 1871 |
-
tensor_backend_id(node) = backend_index;
|
| 1872 |
-
}
|
| 1873 |
-
|
| 1874 |
-
ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
|
| 1875 |
-
int backend_index = tensor_backend_id(node);
|
| 1876 |
-
if (backend_index == -1) {
|
| 1877 |
-
return NULL;
|
| 1878 |
-
}
|
| 1879 |
-
return sched->backends[backend_index];
|
| 1880 |
-
}
|
| 1881 |
-
|
| 1882 |
-
// utils
|
| 1883 |
-
|
| 1884 |
-
void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
| 1885 |
-
GGML_ASSERT(tensor->buffer == NULL);
|
| 1886 |
-
GGML_ASSERT(tensor->view_src != NULL);
|
| 1887 |
-
GGML_ASSERT(tensor->view_src->buffer != NULL);
|
| 1888 |
-
GGML_ASSERT(tensor->view_src->data != NULL);
|
| 1889 |
-
|
| 1890 |
-
tensor->buffer = buffer;
|
| 1891 |
-
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
|
| 1892 |
-
tensor->backend = tensor->view_src->backend;
|
| 1893 |
-
ggml_backend_buffer_init_tensor(buffer, tensor);
|
| 1894 |
-
}
|
| 1895 |
-
|
| 1896 |
-
void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
|
| 1897 |
-
GGML_ASSERT(tensor->buffer == NULL);
|
| 1898 |
-
GGML_ASSERT(tensor->data == NULL);
|
| 1899 |
-
GGML_ASSERT(tensor->view_src == NULL);
|
| 1900 |
-
GGML_ASSERT(addr >= ggml_backend_buffer_get_base(buffer));
|
| 1901 |
-
GGML_ASSERT((char *)addr + ggml_backend_buffer_get_alloc_size(buffer, tensor) <=
|
| 1902 |
-
(char *)ggml_backend_buffer_get_base(buffer) + ggml_backend_buffer_get_size(buffer));
|
| 1903 |
-
|
| 1904 |
-
tensor->buffer = buffer;
|
| 1905 |
-
tensor->data = addr;
|
| 1906 |
-
ggml_backend_buffer_init_tensor(buffer, tensor);
|
| 1907 |
-
}
|
| 1908 |
-
|
| 1909 |
-
static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
|
| 1910 |
-
struct ggml_context * ctx_allocated, struct ggml_context * ctx_unallocated, struct ggml_tensor * src) {
|
| 1911 |
-
|
| 1912 |
-
GGML_ASSERT(src != NULL);
|
| 1913 |
-
GGML_ASSERT(src->data && "graph must be allocated");
|
| 1914 |
-
|
| 1915 |
-
size_t id = ggml_hash_insert(hash_set, src);
|
| 1916 |
-
if (id == GGML_HASHTABLE_ALREADY_EXISTS) {
|
| 1917 |
-
return node_copies[ggml_hash_find(hash_set, src)];
|
| 1918 |
-
}
|
| 1919 |
-
|
| 1920 |
-
struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src);
|
| 1921 |
-
if (src->view_src != NULL) {
|
| 1922 |
-
dst->view_src = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, src->view_src);
|
| 1923 |
-
dst->view_offs = src->view_offs;
|
| 1924 |
-
}
|
| 1925 |
-
dst->op = src->op;
|
| 1926 |
-
memcpy(dst->op_params, src->op_params, sizeof(dst->op_params));
|
| 1927 |
-
ggml_set_name(dst, src->name);
|
| 1928 |
-
|
| 1929 |
-
// copy src
|
| 1930 |
-
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
| 1931 |
-
struct ggml_tensor * s = src->src[i];
|
| 1932 |
-
if (s == NULL) {
|
| 1933 |
-
continue;
|
| 1934 |
-
}
|
| 1935 |
-
dst->src[i] = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, s);
|
| 1936 |
-
}
|
| 1937 |
-
|
| 1938 |
-
node_copies[id] = dst;
|
| 1939 |
-
return dst;
|
| 1940 |
-
}
|
| 1941 |
-
|
| 1942 |
-
static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) {
|
| 1943 |
-
size_t id = ggml_hash_find(hash_set, src);
|
| 1944 |
-
if (node_init[id]) {
|
| 1945 |
-
return;
|
| 1946 |
-
}
|
| 1947 |
-
node_init[id] = true;
|
| 1948 |
-
|
| 1949 |
-
struct ggml_tensor * dst = node_copies[id];
|
| 1950 |
-
if (dst->view_src != NULL) {
|
| 1951 |
-
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
|
| 1952 |
-
ggml_backend_view_init(dst->view_src->buffer, dst);
|
| 1953 |
-
}
|
| 1954 |
-
else {
|
| 1955 |
-
ggml_backend_tensor_copy(src, dst);
|
| 1956 |
-
}
|
| 1957 |
-
|
| 1958 |
-
// init src
|
| 1959 |
-
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
| 1960 |
-
struct ggml_tensor * s = src->src[i];
|
| 1961 |
-
if (s == NULL) {
|
| 1962 |
-
continue;
|
| 1963 |
-
}
|
| 1964 |
-
graph_copy_init_tensor(hash_set, node_copies, node_init, s);
|
| 1965 |
-
}
|
| 1966 |
-
}
|
| 1967 |
-
|
| 1968 |
-
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
|
| 1969 |
-
struct ggml_hash_set hash_set = {
|
| 1970 |
-
/* .size = */ graph->visited_hash_table.size,
|
| 1971 |
-
/* .keys = */ calloc(sizeof(hash_set.keys[0]), graph->visited_hash_table.size) // NOLINT
|
| 1972 |
-
};
|
| 1973 |
-
struct ggml_tensor ** node_copies = calloc(sizeof(node_copies[0]), hash_set.size); // NOLINT
|
| 1974 |
-
bool * node_init = calloc(sizeof(node_init[0]), hash_set.size);
|
| 1975 |
-
|
| 1976 |
-
struct ggml_init_params params = {
|
| 1977 |
-
/* .mem_size = */ ggml_tensor_overhead()*hash_set.size + ggml_graph_overhead_custom(graph->size, false),
|
| 1978 |
-
/* .mem_buffer = */ NULL,
|
| 1979 |
-
/* .no_alloc = */ true
|
| 1980 |
-
};
|
| 1981 |
-
|
| 1982 |
-
struct ggml_context * ctx_allocated = ggml_init(params);
|
| 1983 |
-
struct ggml_context * ctx_unallocated = ggml_init(params);
|
| 1984 |
-
|
| 1985 |
-
if (ctx_allocated == NULL || ctx_unallocated == NULL) {
|
| 1986 |
-
fprintf(stderr, "failed to allocate context for graph copy\n");
|
| 1987 |
-
free(hash_set.keys);
|
| 1988 |
-
free(node_copies);
|
| 1989 |
-
free(node_init);
|
| 1990 |
-
ggml_free(ctx_allocated);
|
| 1991 |
-
ggml_free(ctx_unallocated);
|
| 1992 |
-
return (struct ggml_backend_graph_copy) {
|
| 1993 |
-
/* .buffer = */ NULL,
|
| 1994 |
-
/* .ctx_allocated = */ NULL,
|
| 1995 |
-
/* .ctx_unallocated = */ NULL,
|
| 1996 |
-
/* .graph = */ NULL,
|
| 1997 |
-
};
|
| 1998 |
-
}
|
| 1999 |
-
|
| 2000 |
-
// dup nodes
|
| 2001 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 2002 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 2003 |
-
graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, node);
|
| 2004 |
-
}
|
| 2005 |
-
|
| 2006 |
-
// allocate nodes
|
| 2007 |
-
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend);
|
| 2008 |
-
if (buffer == NULL) {
|
| 2009 |
-
fprintf(stderr, "failed to allocate buffer for graph copy\n");
|
| 2010 |
-
free(hash_set.keys);
|
| 2011 |
-
free(node_copies);
|
| 2012 |
-
free(node_init);
|
| 2013 |
-
ggml_free(ctx_allocated);
|
| 2014 |
-
ggml_free(ctx_unallocated);
|
| 2015 |
-
return (struct ggml_backend_graph_copy) {
|
| 2016 |
-
/* .buffer = */ NULL,
|
| 2017 |
-
/* .ctx_allocated = */ NULL,
|
| 2018 |
-
/* .ctx_unallocated = */ NULL,
|
| 2019 |
-
/* .graph = */ NULL,
|
| 2020 |
-
};
|
| 2021 |
-
}
|
| 2022 |
-
|
| 2023 |
-
//printf("copy buffer size: %zu MB\n", ggml_backend_buffer_get_size(buffer) / 1024 / 1024);
|
| 2024 |
-
|
| 2025 |
-
// copy data and init views
|
| 2026 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 2027 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 2028 |
-
graph_copy_init_tensor(hash_set, node_copies, node_init, node);
|
| 2029 |
-
}
|
| 2030 |
-
|
| 2031 |
-
// build graph copy
|
| 2032 |
-
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false);
|
| 2033 |
-
for (int i = 0; i < graph->n_nodes; i++) {
|
| 2034 |
-
struct ggml_tensor * node = graph->nodes[i];
|
| 2035 |
-
struct ggml_tensor * node_copy = node_copies[ggml_hash_find(hash_set, node)];
|
| 2036 |
-
graph_copy->nodes[i] = node_copy;
|
| 2037 |
-
}
|
| 2038 |
-
graph_copy->n_nodes = graph->n_nodes;
|
| 2039 |
-
|
| 2040 |
-
free(hash_set.keys);
|
| 2041 |
-
free(node_copies);
|
| 2042 |
-
free(node_init);
|
| 2043 |
-
|
| 2044 |
-
return (struct ggml_backend_graph_copy) {
|
| 2045 |
-
/* .buffer = */ buffer,
|
| 2046 |
-
/* .ctx_allocated = */ ctx_allocated,
|
| 2047 |
-
/* .ctx_unallocated = */ ctx_unallocated,
|
| 2048 |
-
/* .graph = */ graph_copy,
|
| 2049 |
-
};
|
| 2050 |
-
}
|
| 2051 |
-
|
| 2052 |
-
void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) {
|
| 2053 |
-
ggml_backend_buffer_free(copy.buffer);
|
| 2054 |
-
ggml_free(copy.ctx_allocated);
|
| 2055 |
-
ggml_free(copy.ctx_unallocated);
|
| 2056 |
-
}
|
| 2057 |
-
|
| 2058 |
-
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data) {
|
| 2059 |
-
struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
|
| 2060 |
-
if (copy.buffer == NULL) {
|
| 2061 |
-
return false;
|
| 2062 |
-
}
|
| 2063 |
-
|
| 2064 |
-
struct ggml_cgraph * g1 = graph;
|
| 2065 |
-
struct ggml_cgraph * g2 = copy.graph;
|
| 2066 |
-
|
| 2067 |
-
assert(g1->n_nodes == g2->n_nodes);
|
| 2068 |
-
|
| 2069 |
-
for (int i = 0; i < g1->n_nodes; i++) {
|
| 2070 |
-
//printf("eval %d/%d\n", i, g1->n_nodes);
|
| 2071 |
-
struct ggml_tensor * t1 = g1->nodes[i];
|
| 2072 |
-
struct ggml_tensor * t2 = g2->nodes[i];
|
| 2073 |
-
|
| 2074 |
-
assert(t1->op == t2->op && ggml_are_same_layout(t1, t2));
|
| 2075 |
-
|
| 2076 |
-
struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1);
|
| 2077 |
-
struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1);
|
| 2078 |
-
|
| 2079 |
-
ggml_backend_graph_compute(backend1, &g1v);
|
| 2080 |
-
ggml_backend_graph_compute(backend2, &g2v);
|
| 2081 |
-
|
| 2082 |
-
if (ggml_is_view_op(t1->op)) {
|
| 2083 |
-
continue;
|
| 2084 |
-
}
|
| 2085 |
-
|
| 2086 |
-
// compare results, calculate rms etc
|
| 2087 |
-
if (!callback(i, t1, t2, user_data)) {
|
| 2088 |
-
break;
|
| 2089 |
-
}
|
| 2090 |
-
}
|
| 2091 |
-
|
| 2092 |
-
ggml_backend_graph_copy_free(copy);
|
| 2093 |
-
|
| 2094 |
-
return true;
|
| 2095 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-backend.h
DELETED
|
@@ -1,233 +0,0 @@
|
|
| 1 |
-
#pragma once
|
| 2 |
-
|
| 3 |
-
#include "ggml.h"
|
| 4 |
-
#include "ggml-alloc.h"
|
| 5 |
-
|
| 6 |
-
#ifdef __cplusplus
|
| 7 |
-
extern "C" {
|
| 8 |
-
#endif
|
| 9 |
-
|
| 10 |
-
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
| 11 |
-
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
| 12 |
-
typedef struct ggml_backend_event * ggml_backend_event_t;
|
| 13 |
-
typedef struct ggml_backend * ggml_backend_t;
|
| 14 |
-
typedef void * ggml_backend_graph_plan_t;
|
| 15 |
-
|
| 16 |
-
//
|
| 17 |
-
// Backend buffer
|
| 18 |
-
//
|
| 19 |
-
|
| 20 |
-
// buffer type
|
| 21 |
-
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
| 22 |
-
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
| 23 |
-
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
| 24 |
-
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
| 25 |
-
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
| 26 |
-
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
| 27 |
-
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
| 28 |
-
|
| 29 |
-
// buffer
|
| 30 |
-
enum ggml_backend_buffer_usage {
|
| 31 |
-
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
| 32 |
-
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
| 33 |
-
};
|
| 34 |
-
|
| 35 |
-
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
| 36 |
-
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
| 37 |
-
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
| 38 |
-
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
| 39 |
-
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
| 40 |
-
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
| 41 |
-
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
| 42 |
-
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
| 43 |
-
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
| 44 |
-
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
| 45 |
-
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
| 46 |
-
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
| 47 |
-
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
| 48 |
-
|
| 49 |
-
//
|
| 50 |
-
// Backend
|
| 51 |
-
//
|
| 52 |
-
|
| 53 |
-
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
| 54 |
-
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
| 55 |
-
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
| 56 |
-
|
| 57 |
-
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
| 58 |
-
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
| 59 |
-
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
| 60 |
-
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
| 61 |
-
|
| 62 |
-
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
| 63 |
-
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
| 64 |
-
|
| 65 |
-
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
| 66 |
-
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
| 67 |
-
|
| 68 |
-
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
| 69 |
-
|
| 70 |
-
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
| 71 |
-
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
| 72 |
-
|
| 73 |
-
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
| 74 |
-
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
| 75 |
-
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
| 76 |
-
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
| 77 |
-
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
| 78 |
-
|
| 79 |
-
// tensor copy between different backends
|
| 80 |
-
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
| 81 |
-
|
| 82 |
-
// asynchronous copy
|
| 83 |
-
// the copy is performed after all the currently queued operations in backend_src
|
| 84 |
-
// backend_dst will wait for the copy to complete before performing other operations
|
| 85 |
-
// automatic fallback to sync copy if async is not supported
|
| 86 |
-
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
| 87 |
-
|
| 88 |
-
// events
|
| 89 |
-
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
|
| 90 |
-
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
|
| 91 |
-
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
|
| 92 |
-
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
| 93 |
-
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event); // wait async on event
|
| 94 |
-
|
| 95 |
-
//
|
| 96 |
-
// CPU backend
|
| 97 |
-
//
|
| 98 |
-
|
| 99 |
-
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
| 100 |
-
|
| 101 |
-
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
|
| 102 |
-
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
| 103 |
-
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
| 104 |
-
|
| 105 |
-
// Create a backend buffer from an existing pointer
|
| 106 |
-
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
| 107 |
-
|
| 108 |
-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
| 109 |
-
|
| 110 |
-
#ifdef GGML_USE_CPU_HBM
|
| 111 |
-
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
|
| 112 |
-
#endif
|
| 113 |
-
|
| 114 |
-
//
|
| 115 |
-
// Backend registry
|
| 116 |
-
//
|
| 117 |
-
|
| 118 |
-
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
|
| 119 |
-
|
| 120 |
-
GGML_API size_t ggml_backend_reg_get_count(void);
|
| 121 |
-
GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
|
| 122 |
-
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is name[:params]
|
| 123 |
-
GGML_API const char * ggml_backend_reg_get_name(size_t i);
|
| 124 |
-
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
|
| 125 |
-
GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
|
| 126 |
-
GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
|
| 127 |
-
|
| 128 |
-
//
|
| 129 |
-
// Backend scheduler
|
| 130 |
-
//
|
| 131 |
-
|
| 132 |
-
// The backend scheduler allows for multiple backends to be used together
|
| 133 |
-
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
| 134 |
-
// The backends are selected based on:
|
| 135 |
-
// - the backend that supports the operation
|
| 136 |
-
// - the location of the pre-allocated tensors (e.g. the weights)
|
| 137 |
-
/*
|
| 138 |
-
Example usage:
|
| 139 |
-
|
| 140 |
-
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
|
| 141 |
-
// preferrably to run on the same backend as the buffer
|
| 142 |
-
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
| 143 |
-
|
| 144 |
-
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
|
| 145 |
-
|
| 146 |
-
// initialize buffers from a max size graph (optional)
|
| 147 |
-
reserve_graph = build_graph(sched, max_batch_size);
|
| 148 |
-
|
| 149 |
-
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
| 150 |
-
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
| 151 |
-
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
|
| 152 |
-
|
| 153 |
-
ggml_backend_sched_reserve(sched, reserve_graph);
|
| 154 |
-
|
| 155 |
-
// compute
|
| 156 |
-
graph = build_graph(sched);
|
| 157 |
-
ggml_backend_sched_graph_compute(sched, graph);
|
| 158 |
-
|
| 159 |
-
// if there are graph inputs:
|
| 160 |
-
ggml_backend_sched_reset(sched);
|
| 161 |
-
ggml_backend_sched_alloc_graph(sched, graph);
|
| 162 |
-
ggml_backend_tensor_set(input_tensor, ...);
|
| 163 |
-
ggml_backend_sched_graph_compute(sched, graph);
|
| 164 |
-
}
|
| 165 |
-
*/
|
| 166 |
-
|
| 167 |
-
struct ggml_backend_sched;
|
| 168 |
-
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
| 169 |
-
|
| 170 |
-
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
| 171 |
-
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
| 172 |
-
//
|
| 173 |
-
// when ask == false, the scheduler is passing the node tensor to the user for observation
|
| 174 |
-
// if the user returns false, the scheduler will cancel the graph compute
|
| 175 |
-
//
|
| 176 |
-
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
| 177 |
-
|
| 178 |
-
// Initialize a backend scheduler
|
| 179 |
-
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
| 180 |
-
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
| 181 |
-
|
| 182 |
-
// Initialize backend buffers from a measure graph
|
| 183 |
-
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
| 184 |
-
|
| 185 |
-
// Get the number of splits of the last graph
|
| 186 |
-
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
| 187 |
-
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
| 188 |
-
|
| 189 |
-
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
| 190 |
-
|
| 191 |
-
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
| 192 |
-
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
| 193 |
-
|
| 194 |
-
// Allocate and compute graph on the backend scheduler
|
| 195 |
-
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
| 196 |
-
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
| 197 |
-
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
| 198 |
-
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
| 199 |
-
|
| 200 |
-
// Reset all assignments and allocators - must be called before changing the node backends
|
| 201 |
-
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
| 202 |
-
|
| 203 |
-
// Set a callback to be called for each resulting node during graph compute
|
| 204 |
-
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
| 205 |
-
|
| 206 |
-
//
|
| 207 |
-
// Utils
|
| 208 |
-
//
|
| 209 |
-
|
| 210 |
-
struct ggml_backend_graph_copy {
|
| 211 |
-
ggml_backend_buffer_t buffer;
|
| 212 |
-
struct ggml_context * ctx_allocated;
|
| 213 |
-
struct ggml_context * ctx_unallocated;
|
| 214 |
-
struct ggml_cgraph * graph;
|
| 215 |
-
};
|
| 216 |
-
|
| 217 |
-
// Copy a graph to a different backend
|
| 218 |
-
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
| 219 |
-
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
| 220 |
-
|
| 221 |
-
typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
| 222 |
-
|
| 223 |
-
// Compare the output of two backends
|
| 224 |
-
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
| 225 |
-
|
| 226 |
-
// Tensor initialization
|
| 227 |
-
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
| 228 |
-
GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
#ifdef __cplusplus
|
| 232 |
-
}
|
| 233 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-common.h
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
bindings/ruby/ext/ggml-cuda.h
DELETED
|
@@ -1,43 +0,0 @@
|
|
| 1 |
-
#pragma once
|
| 2 |
-
|
| 3 |
-
#include "ggml.h"
|
| 4 |
-
#include "ggml-backend.h"
|
| 5 |
-
|
| 6 |
-
#ifdef GGML_USE_HIPBLAS
|
| 7 |
-
#define GGML_CUDA_NAME "ROCm"
|
| 8 |
-
#define GGML_CUBLAS_NAME "hipBLAS"
|
| 9 |
-
#else
|
| 10 |
-
#define GGML_CUDA_NAME "CUDA"
|
| 11 |
-
#define GGML_CUBLAS_NAME "cuBLAS"
|
| 12 |
-
#endif
|
| 13 |
-
|
| 14 |
-
#ifdef __cplusplus
|
| 15 |
-
extern "C" {
|
| 16 |
-
#endif
|
| 17 |
-
|
| 18 |
-
#define GGML_CUDA_MAX_DEVICES 16
|
| 19 |
-
|
| 20 |
-
// backend API
|
| 21 |
-
GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
|
| 22 |
-
|
| 23 |
-
GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
|
| 24 |
-
|
| 25 |
-
// device buffer
|
| 26 |
-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
|
| 27 |
-
|
| 28 |
-
// split tensor buffer that splits matrices by rows across multiple devices
|
| 29 |
-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
|
| 30 |
-
|
| 31 |
-
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
| 32 |
-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
|
| 33 |
-
|
| 34 |
-
GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
|
| 35 |
-
GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
|
| 36 |
-
GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
|
| 37 |
-
|
| 38 |
-
GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
|
| 39 |
-
GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
|
| 40 |
-
|
| 41 |
-
#ifdef __cplusplus
|
| 42 |
-
}
|
| 43 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-impl.h
DELETED
|
@@ -1,272 +0,0 @@
|
|
| 1 |
-
#pragma once
|
| 2 |
-
|
| 3 |
-
#include "ggml.h"
|
| 4 |
-
|
| 5 |
-
// GGML internal header
|
| 6 |
-
|
| 7 |
-
#include <assert.h>
|
| 8 |
-
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
|
| 9 |
-
#include <stddef.h>
|
| 10 |
-
#include <stdbool.h>
|
| 11 |
-
#include <string.h> // memcpy
|
| 12 |
-
#include <math.h> // fabsf
|
| 13 |
-
|
| 14 |
-
#ifdef __cplusplus
|
| 15 |
-
extern "C" {
|
| 16 |
-
#endif
|
| 17 |
-
|
| 18 |
-
// static_assert should be a #define, but if it's not,
|
| 19 |
-
// fall back to the _Static_assert C11 keyword.
|
| 20 |
-
// if C99 - static_assert is noop
|
| 21 |
-
// ref: https://stackoverflow.com/a/53923785/4039976
|
| 22 |
-
#ifndef __cplusplus
|
| 23 |
-
#ifndef static_assert
|
| 24 |
-
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
|
| 25 |
-
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
| 26 |
-
#else
|
| 27 |
-
#define static_assert(cond, msg) struct global_scope_noop_trick
|
| 28 |
-
#endif
|
| 29 |
-
#endif
|
| 30 |
-
#endif
|
| 31 |
-
|
| 32 |
-
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
|
| 33 |
-
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
|
| 34 |
-
#ifndef __FMA__
|
| 35 |
-
#define __FMA__
|
| 36 |
-
#endif
|
| 37 |
-
#ifndef __F16C__
|
| 38 |
-
#define __F16C__
|
| 39 |
-
#endif
|
| 40 |
-
#endif
|
| 41 |
-
|
| 42 |
-
// __SSE3__ and __SSSE3__ are not defined in MSVC, but SSE3/SSSE3 are present when AVX/AVX2/AVX512 are available
|
| 43 |
-
#if defined(_MSC_VER) && (defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__))
|
| 44 |
-
#ifndef __SSE3__
|
| 45 |
-
#define __SSE3__
|
| 46 |
-
#endif
|
| 47 |
-
#ifndef __SSSE3__
|
| 48 |
-
#define __SSSE3__
|
| 49 |
-
#endif
|
| 50 |
-
#endif
|
| 51 |
-
|
| 52 |
-
// 16-bit float
|
| 53 |
-
// on Arm, we use __fp16
|
| 54 |
-
// on x86, we use uint16_t
|
| 55 |
-
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
| 56 |
-
|
| 57 |
-
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
| 58 |
-
//
|
| 59 |
-
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
| 60 |
-
//
|
| 61 |
-
#include <arm_neon.h>
|
| 62 |
-
|
| 63 |
-
typedef __fp16 ggml_fp16_internal_t;
|
| 64 |
-
|
| 65 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
| 66 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
| 67 |
-
|
| 68 |
-
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
| 69 |
-
|
| 70 |
-
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
| 71 |
-
ggml_fp16_internal_t tmp;
|
| 72 |
-
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
|
| 73 |
-
return (float)tmp;
|
| 74 |
-
}
|
| 75 |
-
|
| 76 |
-
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
| 77 |
-
ggml_fp16_t res;
|
| 78 |
-
ggml_fp16_internal_t tmp = f;
|
| 79 |
-
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
|
| 80 |
-
return res;
|
| 81 |
-
}
|
| 82 |
-
|
| 83 |
-
#else
|
| 84 |
-
|
| 85 |
-
typedef uint16_t ggml_fp16_internal_t;
|
| 86 |
-
|
| 87 |
-
#ifdef __wasm_simd128__
|
| 88 |
-
#include <wasm_simd128.h>
|
| 89 |
-
#else
|
| 90 |
-
#ifdef __POWER9_VECTOR__
|
| 91 |
-
#include <altivec.h>
|
| 92 |
-
#undef bool
|
| 93 |
-
#define bool _Bool
|
| 94 |
-
#else
|
| 95 |
-
#if defined(_MSC_VER) || defined(__MINGW32__)
|
| 96 |
-
#include <intrin.h>
|
| 97 |
-
#else
|
| 98 |
-
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
|
| 99 |
-
#if !defined(__riscv)
|
| 100 |
-
#include <immintrin.h>
|
| 101 |
-
#endif
|
| 102 |
-
#endif
|
| 103 |
-
#endif
|
| 104 |
-
#endif
|
| 105 |
-
#endif
|
| 106 |
-
|
| 107 |
-
#ifdef __riscv_v_intrinsic
|
| 108 |
-
#include <riscv_vector.h>
|
| 109 |
-
#endif
|
| 110 |
-
|
| 111 |
-
#ifdef __F16C__
|
| 112 |
-
|
| 113 |
-
#ifdef _MSC_VER
|
| 114 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
|
| 115 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
|
| 116 |
-
#else
|
| 117 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
|
| 118 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
|
| 119 |
-
#endif
|
| 120 |
-
|
| 121 |
-
#elif defined(__POWER9_VECTOR__)
|
| 122 |
-
|
| 123 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
| 124 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
| 125 |
-
/* the inline asm below is about 12% faster than the lookup method */
|
| 126 |
-
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
|
| 127 |
-
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
| 128 |
-
|
| 129 |
-
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
| 130 |
-
register float f;
|
| 131 |
-
register double d;
|
| 132 |
-
__asm__(
|
| 133 |
-
"mtfprd %0,%2\n"
|
| 134 |
-
"xscvhpdp %0,%0\n"
|
| 135 |
-
"frsp %1,%0\n" :
|
| 136 |
-
/* temp */ "=d"(d),
|
| 137 |
-
/* out */ "=f"(f):
|
| 138 |
-
/* in */ "r"(h));
|
| 139 |
-
return f;
|
| 140 |
-
}
|
| 141 |
-
|
| 142 |
-
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
| 143 |
-
register double d;
|
| 144 |
-
register ggml_fp16_t r;
|
| 145 |
-
__asm__( /* xscvdphp can work on double or single precision */
|
| 146 |
-
"xscvdphp %0,%2\n"
|
| 147 |
-
"mffprd %1,%0\n" :
|
| 148 |
-
/* temp */ "=d"(d),
|
| 149 |
-
/* out */ "=r"(r):
|
| 150 |
-
/* in */ "f"(f));
|
| 151 |
-
return r;
|
| 152 |
-
}
|
| 153 |
-
|
| 154 |
-
#else
|
| 155 |
-
|
| 156 |
-
// FP16 <-> FP32
|
| 157 |
-
// ref: https://github.com/Maratyszcza/FP16
|
| 158 |
-
|
| 159 |
-
static inline float fp32_from_bits(uint32_t w) {
|
| 160 |
-
union {
|
| 161 |
-
uint32_t as_bits;
|
| 162 |
-
float as_value;
|
| 163 |
-
} fp32;
|
| 164 |
-
fp32.as_bits = w;
|
| 165 |
-
return fp32.as_value;
|
| 166 |
-
}
|
| 167 |
-
|
| 168 |
-
static inline uint32_t fp32_to_bits(float f) {
|
| 169 |
-
union {
|
| 170 |
-
float as_value;
|
| 171 |
-
uint32_t as_bits;
|
| 172 |
-
} fp32;
|
| 173 |
-
fp32.as_value = f;
|
| 174 |
-
return fp32.as_bits;
|
| 175 |
-
}
|
| 176 |
-
|
| 177 |
-
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
| 178 |
-
const uint32_t w = (uint32_t) h << 16;
|
| 179 |
-
const uint32_t sign = w & UINT32_C(0x80000000);
|
| 180 |
-
const uint32_t two_w = w + w;
|
| 181 |
-
|
| 182 |
-
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
|
| 183 |
-
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
| 184 |
-
const float exp_scale = 0x1.0p-112f;
|
| 185 |
-
#else
|
| 186 |
-
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
|
| 187 |
-
#endif
|
| 188 |
-
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
|
| 189 |
-
|
| 190 |
-
const uint32_t magic_mask = UINT32_C(126) << 23;
|
| 191 |
-
const float magic_bias = 0.5f;
|
| 192 |
-
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
|
| 193 |
-
|
| 194 |
-
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
|
| 195 |
-
const uint32_t result = sign |
|
| 196 |
-
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
|
| 197 |
-
return fp32_from_bits(result);
|
| 198 |
-
}
|
| 199 |
-
|
| 200 |
-
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
| 201 |
-
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
| 202 |
-
const float scale_to_inf = 0x1.0p+112f;
|
| 203 |
-
const float scale_to_zero = 0x1.0p-110f;
|
| 204 |
-
#else
|
| 205 |
-
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
|
| 206 |
-
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
|
| 207 |
-
#endif
|
| 208 |
-
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
|
| 209 |
-
|
| 210 |
-
const uint32_t w = fp32_to_bits(f);
|
| 211 |
-
const uint32_t shl1_w = w + w;
|
| 212 |
-
const uint32_t sign = w & UINT32_C(0x80000000);
|
| 213 |
-
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
|
| 214 |
-
if (bias < UINT32_C(0x71000000)) {
|
| 215 |
-
bias = UINT32_C(0x71000000);
|
| 216 |
-
}
|
| 217 |
-
|
| 218 |
-
base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
|
| 219 |
-
const uint32_t bits = fp32_to_bits(base);
|
| 220 |
-
const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
|
| 221 |
-
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
|
| 222 |
-
const uint32_t nonsign = exp_bits + mantissa_bits;
|
| 223 |
-
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
|
| 224 |
-
}
|
| 225 |
-
|
| 226 |
-
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
| 227 |
-
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
| 228 |
-
|
| 229 |
-
#endif // __F16C__
|
| 230 |
-
|
| 231 |
-
#endif // __ARM_NEON
|
| 232 |
-
|
| 233 |
-
// precomputed f32 table for f16 (256 KB)
|
| 234 |
-
// defined in ggml.c, initialized in ggml_init()
|
| 235 |
-
extern float ggml_table_f32_f16[1 << 16];
|
| 236 |
-
|
| 237 |
-
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
| 238 |
-
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
| 239 |
-
// This is also true for POWER9.
|
| 240 |
-
#if !defined(GGML_FP16_TO_FP32)
|
| 241 |
-
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
| 242 |
-
uint16_t s;
|
| 243 |
-
memcpy(&s, &f, sizeof(uint16_t));
|
| 244 |
-
return ggml_table_f32_f16[s];
|
| 245 |
-
}
|
| 246 |
-
|
| 247 |
-
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
|
| 248 |
-
#endif
|
| 249 |
-
|
| 250 |
-
#if !defined(GGML_FP32_TO_FP16)
|
| 251 |
-
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
| 252 |
-
#endif
|
| 253 |
-
|
| 254 |
-
#define GGML_HASHTABLE_FULL ((size_t)-1)
|
| 255 |
-
#define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2)
|
| 256 |
-
|
| 257 |
-
struct ggml_hash_set ggml_hash_set_new(size_t size);
|
| 258 |
-
|
| 259 |
-
bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
| 260 |
-
|
| 261 |
-
// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
|
| 262 |
-
size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
| 263 |
-
|
| 264 |
-
// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
|
| 265 |
-
size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
| 266 |
-
|
| 267 |
-
// return index, asserts if table is full
|
| 268 |
-
size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
| 269 |
-
|
| 270 |
-
#ifdef __cplusplus
|
| 271 |
-
}
|
| 272 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-kompute.h
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
#pragma once
|
| 2 |
-
|
| 3 |
-
#include "ggml.h"
|
| 4 |
-
#include "ggml-backend.h"
|
| 5 |
-
|
| 6 |
-
#include <stdbool.h>
|
| 7 |
-
#include <stddef.h>
|
| 8 |
-
#include <stdint.h>
|
| 9 |
-
|
| 10 |
-
#ifdef __cplusplus
|
| 11 |
-
extern "C" {
|
| 12 |
-
#endif
|
| 13 |
-
|
| 14 |
-
struct ggml_vk_device {
|
| 15 |
-
int index;
|
| 16 |
-
int type; // same as VkPhysicalDeviceType
|
| 17 |
-
size_t heapSize;
|
| 18 |
-
const char * name;
|
| 19 |
-
const char * vendor;
|
| 20 |
-
int subgroupSize;
|
| 21 |
-
uint64_t bufferAlignment;
|
| 22 |
-
uint64_t maxAlloc;
|
| 23 |
-
};
|
| 24 |
-
|
| 25 |
-
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
| 26 |
-
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
| 27 |
-
bool ggml_vk_has_vulkan(void);
|
| 28 |
-
bool ggml_vk_has_device(void);
|
| 29 |
-
struct ggml_vk_device ggml_vk_current_device(void);
|
| 30 |
-
|
| 31 |
-
//
|
| 32 |
-
// backend API
|
| 33 |
-
//
|
| 34 |
-
|
| 35 |
-
// forward declaration
|
| 36 |
-
typedef struct ggml_backend * ggml_backend_t;
|
| 37 |
-
|
| 38 |
-
GGML_API ggml_backend_t ggml_backend_kompute_init(int device);
|
| 39 |
-
|
| 40 |
-
GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
|
| 41 |
-
|
| 42 |
-
GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
| 43 |
-
|
| 44 |
-
#ifdef __cplusplus
|
| 45 |
-
}
|
| 46 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-metal.h
DELETED
|
@@ -1,66 +0,0 @@
|
|
| 1 |
-
// An interface allowing to compute ggml_cgraph with Metal
|
| 2 |
-
//
|
| 3 |
-
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
| 4 |
-
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, OpenCL, etc.)
|
| 5 |
-
//
|
| 6 |
-
// How it works?
|
| 7 |
-
//
|
| 8 |
-
// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
|
| 9 |
-
// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
|
| 10 |
-
// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
|
| 11 |
-
//
|
| 12 |
-
// You only need to make sure that all memory buffers that you used during the graph creation
|
| 13 |
-
// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
|
| 14 |
-
// used during the graph evaluation to determine the arguments of the compute kernels.
|
| 15 |
-
//
|
| 16 |
-
// Synchronization between device and host memory (for example for input and output tensors)
|
| 17 |
-
// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
|
| 18 |
-
//
|
| 19 |
-
|
| 20 |
-
#pragma once
|
| 21 |
-
|
| 22 |
-
#include "ggml.h"
|
| 23 |
-
#include "ggml-backend.h"
|
| 24 |
-
|
| 25 |
-
#include <stddef.h>
|
| 26 |
-
#include <stdbool.h>
|
| 27 |
-
|
| 28 |
-
// max memory buffers that can be mapped to the device
|
| 29 |
-
#define GGML_METAL_MAX_BUFFERS 64
|
| 30 |
-
|
| 31 |
-
struct ggml_tensor;
|
| 32 |
-
struct ggml_cgraph;
|
| 33 |
-
|
| 34 |
-
#ifdef __cplusplus
|
| 35 |
-
extern "C" {
|
| 36 |
-
#endif
|
| 37 |
-
|
| 38 |
-
//
|
| 39 |
-
// backend API
|
| 40 |
-
// user-code should use only these functions
|
| 41 |
-
//
|
| 42 |
-
|
| 43 |
-
GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
| 44 |
-
|
| 45 |
-
GGML_API ggml_backend_t ggml_backend_metal_init(void);
|
| 46 |
-
|
| 47 |
-
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
| 48 |
-
|
| 49 |
-
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
|
| 50 |
-
|
| 51 |
-
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
|
| 52 |
-
|
| 53 |
-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
| 54 |
-
|
| 55 |
-
// helper to check if the device supports a specific family
|
| 56 |
-
// ideally, the user code should be doing these checks
|
| 57 |
-
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
| 58 |
-
GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
| 59 |
-
|
| 60 |
-
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
|
| 61 |
-
GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
|
| 62 |
-
|
| 63 |
-
#ifdef __cplusplus
|
| 64 |
-
}
|
| 65 |
-
#endif
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-opencl.h
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
#pragma once
|
| 2 |
-
|
| 3 |
-
#include "ggml.h"
|
| 4 |
-
#include "ggml-backend.h"
|
| 5 |
-
|
| 6 |
-
#ifdef __cplusplus
|
| 7 |
-
extern "C" {
|
| 8 |
-
#endif
|
| 9 |
-
|
| 10 |
-
GGML_API void ggml_cl_init(void);
|
| 11 |
-
|
| 12 |
-
GGML_API void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
| 13 |
-
GGML_API void ggml_cl_add(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
| 14 |
-
GGML_API bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst);
|
| 15 |
-
GGML_API size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
| 16 |
-
GGML_API void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
| 17 |
-
|
| 18 |
-
// GGML_API void * ggml_cl_host_malloc(size_t size);
|
| 19 |
-
// GGML_API void ggml_cl_host_free(void * ptr);
|
| 20 |
-
|
| 21 |
-
GGML_API void ggml_cl_free_data(const struct ggml_tensor* tensor);
|
| 22 |
-
|
| 23 |
-
GGML_API void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
|
| 24 |
-
|
| 25 |
-
// backend API
|
| 26 |
-
|
| 27 |
-
// GGML_API ggml_backend_t ggml_backend_opencl_init(void);
|
| 28 |
-
|
| 29 |
-
// GGML_API bool ggml_backend_is_opencl(ggml_backend_t backend);
|
| 30 |
-
|
| 31 |
-
GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
|
| 32 |
-
// GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
|
| 33 |
-
|
| 34 |
-
#ifdef __cplusplus
|
| 35 |
-
}
|
| 36 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-quants.c
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
bindings/ruby/ext/ggml-quants.h
DELETED
|
@@ -1,133 +0,0 @@
|
|
| 1 |
-
#pragma once
|
| 2 |
-
|
| 3 |
-
#define GGML_COMMON_DECL_C
|
| 4 |
-
#include "ggml-common.h"
|
| 5 |
-
|
| 6 |
-
#include "ggml.h"
|
| 7 |
-
|
| 8 |
-
// GGML internal header
|
| 9 |
-
|
| 10 |
-
#ifdef __cplusplus
|
| 11 |
-
extern "C" {
|
| 12 |
-
#endif
|
| 13 |
-
|
| 14 |
-
// Quantization
|
| 15 |
-
void quantize_row_q4_0_reference(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k);
|
| 16 |
-
void quantize_row_q4_1_reference(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k);
|
| 17 |
-
void quantize_row_q5_0_reference(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k);
|
| 18 |
-
void quantize_row_q5_1_reference(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k);
|
| 19 |
-
void quantize_row_q8_0_reference(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k);
|
| 20 |
-
void quantize_row_q8_1_reference(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k);
|
| 21 |
-
|
| 22 |
-
void quantize_row_q2_K_reference(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k);
|
| 23 |
-
void quantize_row_q3_K_reference(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k);
|
| 24 |
-
void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k);
|
| 25 |
-
void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k);
|
| 26 |
-
void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k);
|
| 27 |
-
void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k);
|
| 28 |
-
|
| 29 |
-
void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k);
|
| 30 |
-
void quantize_row_iq4_nl_reference (const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k);
|
| 31 |
-
void quantize_row_iq4_xs_reference (const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k);
|
| 32 |
-
void quantize_row_iq3_s_reference (const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k);
|
| 33 |
-
void quantize_row_iq2_s_reference (const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k);
|
| 34 |
-
|
| 35 |
-
void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 36 |
-
void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 37 |
-
void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 38 |
-
void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 39 |
-
void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 40 |
-
void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 41 |
-
|
| 42 |
-
void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 43 |
-
void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 44 |
-
void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 45 |
-
void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 46 |
-
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 47 |
-
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 48 |
-
|
| 49 |
-
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 50 |
-
void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 51 |
-
void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 52 |
-
void quantize_row_iq3_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 53 |
-
void quantize_row_iq2_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
| 54 |
-
|
| 55 |
-
// Dequantization
|
| 56 |
-
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 57 |
-
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 58 |
-
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 59 |
-
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 60 |
-
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 61 |
-
//void dequantize_row_q8_1(const block_q8_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 62 |
-
|
| 63 |
-
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 64 |
-
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 65 |
-
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 66 |
-
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 67 |
-
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 68 |
-
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 69 |
-
|
| 70 |
-
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 71 |
-
void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 72 |
-
void dequantize_row_iq2_s (const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 73 |
-
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 74 |
-
void dequantize_row_iq1_s (const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 75 |
-
void dequantize_row_iq1_m (const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 76 |
-
void dequantize_row_iq4_nl (const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 77 |
-
void dequantize_row_iq4_xs (const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 78 |
-
void dequantize_row_iq3_s (const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 79 |
-
|
| 80 |
-
// Dot product
|
| 81 |
-
void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 82 |
-
void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 83 |
-
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 84 |
-
void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 85 |
-
void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 86 |
-
|
| 87 |
-
void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 88 |
-
void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 89 |
-
void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 90 |
-
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 91 |
-
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 92 |
-
|
| 93 |
-
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 94 |
-
void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 95 |
-
void ggml_vec_dot_iq2_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 96 |
-
void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 97 |
-
void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 98 |
-
void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 99 |
-
void ggml_vec_dot_iq4_nl_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 100 |
-
void ggml_vec_dot_iq4_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 101 |
-
void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
| 102 |
-
|
| 103 |
-
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
|
| 104 |
-
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 105 |
-
size_t quantize_iq2_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 106 |
-
size_t quantize_iq2_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 107 |
-
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 108 |
-
size_t quantize_iq1_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 109 |
-
size_t quantize_iq1_m (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 110 |
-
size_t quantize_iq4_nl (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 111 |
-
size_t quantize_iq4_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 112 |
-
size_t quantize_iq3_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 113 |
-
|
| 114 |
-
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 115 |
-
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 116 |
-
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 117 |
-
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 118 |
-
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 119 |
-
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 120 |
-
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 121 |
-
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 122 |
-
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 123 |
-
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
| 124 |
-
|
| 125 |
-
void iq2xs_init_impl(enum ggml_type type);
|
| 126 |
-
void iq2xs_free_impl(enum ggml_type type);
|
| 127 |
-
void iq3xs_init_impl(int grid_size);
|
| 128 |
-
void iq3xs_free_impl(int grid_size);
|
| 129 |
-
|
| 130 |
-
#ifdef __cplusplus
|
| 131 |
-
}
|
| 132 |
-
#endif
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-sycl.h
DELETED
|
@@ -1,49 +0,0 @@
|
|
| 1 |
-
//
|
| 2 |
-
// MIT license
|
| 3 |
-
// Copyright (C) 2024 Intel Corporation
|
| 4 |
-
// SPDX-License-Identifier: MIT
|
| 5 |
-
//
|
| 6 |
-
|
| 7 |
-
#pragma once
|
| 8 |
-
|
| 9 |
-
#include "ggml.h"
|
| 10 |
-
#include "ggml-backend.h"
|
| 11 |
-
|
| 12 |
-
#ifdef __cplusplus
|
| 13 |
-
extern "C" {
|
| 14 |
-
#endif
|
| 15 |
-
|
| 16 |
-
#define GGML_SYCL_MAX_DEVICES 48
|
| 17 |
-
#define GGML_SYCL_NAME "SYCL"
|
| 18 |
-
|
| 19 |
-
// backend API
|
| 20 |
-
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
| 21 |
-
|
| 22 |
-
// devide buffer
|
| 23 |
-
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
| 24 |
-
|
| 25 |
-
// split tensor buffer that splits matrices by rows across multiple devices
|
| 26 |
-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
| 27 |
-
|
| 28 |
-
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
| 29 |
-
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
| 30 |
-
|
| 31 |
-
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
| 32 |
-
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
| 33 |
-
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
| 34 |
-
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
| 35 |
-
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
| 36 |
-
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
| 37 |
-
|
| 38 |
-
// TODO: these are temporary
|
| 39 |
-
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
|
| 40 |
-
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
| 41 |
-
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
| 42 |
-
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
| 43 |
-
|
| 44 |
-
// SYCL doesn't support registering host memory, keep here for reference
|
| 45 |
-
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
| 46 |
-
// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
| 47 |
-
#ifdef __cplusplus
|
| 48 |
-
}
|
| 49 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/ggml-vulkan.h
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
#pragma once
|
| 2 |
-
|
| 3 |
-
#include "ggml.h"
|
| 4 |
-
#include "ggml-backend.h"
|
| 5 |
-
|
| 6 |
-
#ifdef __cplusplus
|
| 7 |
-
extern "C" {
|
| 8 |
-
#endif
|
| 9 |
-
|
| 10 |
-
#define GGML_VK_NAME "Vulkan"
|
| 11 |
-
#define GGML_VK_MAX_DEVICES 16
|
| 12 |
-
|
| 13 |
-
GGML_API void ggml_vk_instance_init(void);
|
| 14 |
-
|
| 15 |
-
// backend API
|
| 16 |
-
GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
| 17 |
-
|
| 18 |
-
GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend);
|
| 19 |
-
GGML_API GGML_CALL int ggml_backend_vk_get_device_count(void);
|
| 20 |
-
GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
|
| 21 |
-
GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
|
| 22 |
-
|
| 23 |
-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
|
| 24 |
-
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
| 25 |
-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
| 26 |
-
|
| 27 |
-
#ifdef __cplusplus
|
| 28 |
-
}
|
| 29 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bindings/ruby/extsources.yaml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
../../src:
|
| 3 |
+
- ext/whisper.cpp
|
| 4 |
+
../../include:
|
| 5 |
+
- ext/whisper.h
|
| 6 |
+
../../ggml/src:
|
| 7 |
+
- ext/ggml.c
|
| 8 |
+
- ext/ggml-impl.h
|
| 9 |
+
- ext/ggml-aarch64.h
|
| 10 |
+
- ext/ggml-aarch64.c
|
| 11 |
+
- ext/ggml-alloc.c
|
| 12 |
+
- ext/ggml-backend-impl.h
|
| 13 |
+
- ext/ggml-backend.cpp
|
| 14 |
+
- ext/ggml-common.h
|
| 15 |
+
- ext/ggml-quants.h
|
| 16 |
+
- ext/ggml-quants.c
|
| 17 |
+
- ext/ggml-cpu-impl.h
|
| 18 |
+
../../ggml/include:
|
| 19 |
+
- ext/ggml.h
|
| 20 |
+
- ext/ggml-alloc.h
|
| 21 |
+
- ext/ggml-backend.h
|
| 22 |
+
- ext/ggml-cuda.h
|
| 23 |
+
- ext/ggml-kompute.h
|
| 24 |
+
- ext/ggml-metal.h
|
| 25 |
+
- ext/ggml-sycl.h
|
| 26 |
+
- ext/ggml-vulkan.h
|
| 27 |
+
../../examples:
|
| 28 |
+
- ext/dr_wav.h
|
| 29 |
+
../..:
|
| 30 |
+
- README.md
|
| 31 |
+
- LICENSE
|
| 32 |
+
|
bindings/ruby/tests/test_whisper.rb
CHANGED
|
@@ -1,11 +1,10 @@
|
|
| 1 |
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
| 2 |
-
EXTDIR = File.join(TOPDIR, 'ext')
|
| 3 |
-
#$LIBDIR = File.join(TOPDIR, 'lib')
|
| 4 |
-
#$:.unshift(LIBDIR)
|
| 5 |
-
$:.unshift(EXTDIR)
|
| 6 |
|
| 7 |
require 'whisper'
|
| 8 |
require 'test/unit'
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
class TestWhisper < Test::Unit::TestCase
|
| 11 |
def setup
|
|
@@ -128,4 +127,25 @@ class TestWhisper < Test::Unit::TestCase
|
|
| 128 |
}
|
| 129 |
end
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
end
|
|
|
|
| 1 |
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
require 'whisper'
|
| 4 |
require 'test/unit'
|
| 5 |
+
require 'tempfile'
|
| 6 |
+
require 'tmpdir'
|
| 7 |
+
require 'shellwords'
|
| 8 |
|
| 9 |
class TestWhisper < Test::Unit::TestCase
|
| 10 |
def setup
|
|
|
|
| 127 |
}
|
| 128 |
end
|
| 129 |
|
| 130 |
+
def test_build
|
| 131 |
+
Tempfile.create do |file|
|
| 132 |
+
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
|
| 133 |
+
assert_path_exist file.to_path
|
| 134 |
+
end
|
| 135 |
+
end
|
| 136 |
+
|
| 137 |
+
sub_test_case "Building binary on installation" do
|
| 138 |
+
def setup
|
| 139 |
+
system "rake", "build", exception: true
|
| 140 |
+
end
|
| 141 |
+
|
| 142 |
+
def test_install
|
| 143 |
+
filename = `rake -Tbuild`.match(/(whispercpp-(?:.+)\.gem)/)[1]
|
| 144 |
+
basename = "whisper.#{RbConfig::CONFIG["DLEXT"]}"
|
| 145 |
+
Dir.mktmpdir do |dir|
|
| 146 |
+
system "gem", "install", "--install-dir", dir.shellescape, "pkg/#{filename.shellescape}", exception: true
|
| 147 |
+
assert_path_exist File.join(dir, "gems/whispercpp-1.3.0/lib", basename)
|
| 148 |
+
end
|
| 149 |
+
end
|
| 150 |
+
end
|
| 151 |
end
|
bindings/ruby/whispercpp.gemspec
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
Gem::Specification.new do |s|
|
| 2 |
s.name = "whispercpp"
|
| 3 |
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
|
@@ -7,10 +9,8 @@ Gem::Specification.new do |s|
|
|
| 7 |
s.email = '[email protected]'
|
| 8 |
s.extra_rdoc_files = ['LICENSE', 'README.md']
|
| 9 |
|
| 10 |
-
s.files =
|
| 11 |
|
| 12 |
-
#### Load-time details
|
| 13 |
-
s.require_paths = ['lib','ext']
|
| 14 |
s.summary = %q{Ruby whisper.cpp bindings}
|
| 15 |
s.test_files = ["tests/test_whisper.rb"]
|
| 16 |
|
|
|
|
| 1 |
+
require "yaml"
|
| 2 |
+
|
| 3 |
Gem::Specification.new do |s|
|
| 4 |
s.name = "whispercpp"
|
| 5 |
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
|
|
|
| 9 |
s.email = '[email protected]'
|
| 10 |
s.extra_rdoc_files = ['LICENSE', 'README.md']
|
| 11 |
|
| 12 |
+
s.files = `git ls-files . -z`.split("\x0") + YAML.load_file("extsources.yaml").values.flatten
|
| 13 |
|
|
|
|
|
|
|
| 14 |
s.summary = %q{Ruby whisper.cpp bindings}
|
| 15 |
s.test_files = ["tests/test_whisper.rb"]
|
| 16 |
|