ggerganov commited on
Commit
c1c9908
·
1 Parent(s): 43cbdf7

files : remove old sources (part 2)

Browse files
ggml/src/ggml-cpu/cpu-feats-x86.cpp DELETED
@@ -1,327 +0,0 @@
1
- #include "ggml-backend-impl.h"
2
-
3
- #if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
4
-
5
- #ifdef _MSC_VER
6
- #include <intrin.h>
7
- #endif
8
-
9
- #include <cstring>
10
- #include <vector>
11
- #include <bitset>
12
- #include <array>
13
- #include <string>
14
-
15
- // ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf
16
- struct cpuid_x86 {
17
- bool SSE3(void) { return f_1_ecx[0]; }
18
- bool PCLMULQDQ(void) { return f_1_ecx[1]; }
19
- bool MONITOR(void) { return f_1_ecx[3]; }
20
- bool SSSE3(void) { return f_1_ecx[9]; }
21
- bool FMA(void) { return f_1_ecx[12]; }
22
- bool CMPXCHG16B(void) { return f_1_ecx[13]; }
23
- bool SSE41(void) { return f_1_ecx[19]; }
24
- bool SSE42(void) { return f_1_ecx[20]; }
25
- bool MOVBE(void) { return f_1_ecx[22]; }
26
- bool POPCNT(void) { return f_1_ecx[23]; }
27
- bool AES(void) { return f_1_ecx[25]; }
28
- bool XSAVE(void) { return f_1_ecx[26]; }
29
- bool OSXSAVE(void) { return f_1_ecx[27]; }
30
- bool AVX(void) { return f_1_ecx[28]; }
31
- bool F16C(void) { return f_1_ecx[29]; }
32
- bool RDRAND(void) { return f_1_ecx[30]; }
33
-
34
- bool MSR(void) { return f_1_edx[5]; }
35
- bool CX8(void) { return f_1_edx[8]; }
36
- bool SEP(void) { return f_1_edx[11]; }
37
- bool CMOV(void) { return f_1_edx[15]; }
38
- bool CLFSH(void) { return f_1_edx[19]; }
39
- bool MMX(void) { return f_1_edx[23]; }
40
- bool FXSR(void) { return f_1_edx[24]; }
41
- bool SSE(void) { return f_1_edx[25]; }
42
- bool SSE2(void) { return f_1_edx[26]; }
43
-
44
- bool FSGSBASE(void) { return f_7_ebx[0]; }
45
- bool BMI1(void) { return f_7_ebx[3]; }
46
- bool HLE(void) { return is_intel && f_7_ebx[4]; }
47
- bool AVX2(void) { return f_7_ebx[5]; }
48
- bool BMI2(void) { return f_7_ebx[8]; }
49
- bool ERMS(void) { return f_7_ebx[9]; }
50
- bool INVPCID(void) { return f_7_ebx[10]; }
51
- bool RTM(void) { return is_intel && f_7_ebx[11]; }
52
- bool AVX512F(void) { return f_7_ebx[16]; }
53
- bool AVX512DQ(void) { return f_7_ebx[17]; }
54
- bool RDSEED(void) { return f_7_ebx[18]; }
55
- bool ADX(void) { return f_7_ebx[19]; }
56
- bool AVX512PF(void) { return f_7_ebx[26]; }
57
- bool AVX512ER(void) { return f_7_ebx[27]; }
58
- bool AVX512CD(void) { return f_7_ebx[28]; }
59
- bool AVX512BW(void) { return f_7_ebx[30]; }
60
- bool AVX512VL(void) { return f_7_ebx[31]; }
61
-
62
- bool SHA(void) { return f_7_ebx[29]; }
63
-
64
- bool PREFETCHWT1(void) { return f_7_ecx[0]; }
65
-
66
- bool LAHF(void) { return f_81_ecx[0]; }
67
- bool LZCNT(void) { return is_intel && f_81_ecx[5]; }
68
- bool ABM(void) { return is_amd && f_81_ecx[5]; }
69
- bool SSE4a(void) { return is_amd && f_81_ecx[6]; }
70
- bool XOP(void) { return is_amd && f_81_ecx[11]; }
71
- bool TBM(void) { return is_amd && f_81_ecx[21]; }
72
-
73
- bool SYSCALL(void) { return is_intel && f_81_edx[11]; }
74
- bool MMXEXT(void) { return is_amd && f_81_edx[22]; }
75
- bool RDTSCP(void) { return is_intel && f_81_edx[27]; }
76
- bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; }
77
- bool _3DNOW(void) { return is_amd && f_81_edx[31]; }
78
-
79
- bool AVX512_VBMI(void) { return f_7_ecx[1]; }
80
- bool AVX512_VNNI(void) { return f_7_ecx[11]; }
81
- bool AVX512_FP16(void) { return f_7_edx[23]; }
82
- bool AVX512_BF16(void) { return f_7_1_eax[5]; }
83
- bool AVX_VNNI(void) { return f_7_1_eax[4]; }
84
-
85
- bool AMX_TILE(void) { return f_7_edx[24]; }
86
- bool AMX_INT8(void) { return f_7_edx[25]; }
87
- bool AMX_FP16(void) { return f_7_1_eax[21]; }
88
- bool AMX_BF16(void) { return f_7_edx[22]; }
89
-
90
- #ifdef _MSC_VER
91
- static void cpuid(int cpu_info[4], int eax) {
92
- __cpuid(cpu_info, eax);
93
- }
94
- static void cpuidex(int cpu_info[4], int eax, int ecx) {
95
- __cpuidex(cpu_info, eax, ecx);
96
- }
97
- #else
98
- static void cpuid(int cpu_info[4], int eax) {
99
- __asm__ __volatile__(
100
- "cpuid"
101
- : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
102
- : "a"(eax), "c"(0));
103
- }
104
- static void cpuidex(int cpu_info[4], int eax, int ecx) {
105
- __asm__ __volatile__(
106
- "cpuid"
107
- : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
108
- : "a"(eax), "c"(ecx));
109
- }
110
- #endif
111
-
112
- cpuid_x86() {
113
- std::array<int, 4> cpui;
114
- std::vector<std::array<int, 4>> data;
115
-
116
- // calling __cpuid with 0x0 as the function_id argument
117
- // gets the number of the highest valid function ID.
118
- cpuid(cpui.data(), 0);
119
- int n_ids = cpui[0];
120
-
121
- for (int i = 0; i <= n_ids; ++i) {
122
- cpuidex(cpui.data(), i, 0);
123
- data.push_back(cpui);
124
- }
125
-
126
- // capture vendor string
127
- char vendor[0x20] = {};
128
- *reinterpret_cast<int *>(vendor) = data[0][1];
129
- *reinterpret_cast<int *>(vendor + 4) = data[0][3];
130
- *reinterpret_cast<int *>(vendor + 8) = data[0][2];
131
- this->vendor = vendor;
132
- if (this->vendor == "GenuineIntel") {
133
- is_intel = true;
134
- } else if (this->vendor == "AuthenticAMD") {
135
- is_amd = true;
136
- }
137
-
138
- // load bitset with flags for function 0x00000001
139
- if (n_ids >= 1) {
140
- f_1_ecx = data[1][2];
141
- f_1_edx = data[1][3];
142
- }
143
-
144
- // load bitset with flags for function 0x00000007
145
- if (n_ids >= 7) {
146
- f_7_ebx = data[7][1];
147
- f_7_ecx = data[7][2];
148
- f_7_edx = data[7][3];
149
- cpuidex(cpui.data(), 7, 1);
150
- f_7_1_eax = cpui[0];
151
- }
152
-
153
- // calling __cpuid with 0x80000000 as the function_id argument
154
- // gets the number of the highest valid extended ID.
155
- cpuid(cpui.data(), 0x80000000);
156
- unsigned int n_ex_ids = cpui[0];
157
-
158
- std::vector<std::array<int, 4>> ext_data;
159
- for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) {
160
- cpuidex(cpui.data(), i, 0);
161
- ext_data.push_back(cpui);
162
- }
163
-
164
- // load bitset with flags for function 0x80000001
165
- if (n_ex_ids >= 0x80000001) {
166
- f_81_ecx = ext_data[1][2];
167
- f_81_edx = ext_data[1][3];
168
- }
169
-
170
- // interpret CPU brand string if reported
171
- char brand[0x40] = {};
172
- if (n_ex_ids >= 0x80000004) {
173
- std::memcpy(brand, ext_data[2].data(), sizeof(cpui));
174
- std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui));
175
- std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui));
176
- this->brand = brand;
177
- }
178
- }
179
-
180
- bool is_intel = false;
181
- bool is_amd = false;
182
- std::string vendor;
183
- std::string brand;
184
- std::bitset<32> f_1_ecx;
185
- std::bitset<32> f_1_edx;
186
- std::bitset<32> f_7_ebx;
187
- std::bitset<32> f_7_ecx;
188
- std::bitset<32> f_7_edx;
189
- std::bitset<32> f_7_1_eax;
190
- std::bitset<32> f_81_ecx;
191
- std::bitset<32> f_81_edx;
192
- };
193
-
194
- #if 0
195
- void test_x86_is() {
196
- cpuid_x86 is;
197
- printf("CPU Vendor: %s\n", is.vendor.c_str());
198
- printf("Brand: %s\n", is.brand.c_str());
199
- printf("is_intel: %d\n", is.is_intel);
200
- printf("is_amd: %d\n", is.is_amd);
201
- printf("sse3: %d\n", is.SSE3());
202
- printf("pclmulqdq: %d\n", is.PCLMULQDQ());
203
- printf("ssse3: %d\n", is.SSSE3());
204
- printf("fma: %d\n", is.FMA());
205
- printf("cmpxchg16b: %d\n", is.CMPXCHG16B());
206
- printf("sse41: %d\n", is.SSE41());
207
- printf("sse42: %d\n", is.SSE42());
208
- printf("movbe: %d\n", is.MOVBE());
209
- printf("popcnt: %d\n", is.POPCNT());
210
- printf("aes: %d\n", is.AES());
211
- printf("xsave: %d\n", is.XSAVE());
212
- printf("osxsave: %d\n", is.OSXSAVE());
213
- printf("avx: %d\n", is.AVX());
214
- printf("f16c: %d\n", is.F16C());
215
- printf("rdrand: %d\n", is.RDRAND());
216
- printf("msr: %d\n", is.MSR());
217
- printf("cx8: %d\n", is.CX8());
218
- printf("sep: %d\n", is.SEP());
219
- printf("cmov: %d\n", is.CMOV());
220
- printf("clflush: %d\n", is.CLFSH());
221
- printf("mmx: %d\n", is.MMX());
222
- printf("fxsr: %d\n", is.FXSR());
223
- printf("sse: %d\n", is.SSE());
224
- printf("sse2: %d\n", is.SSE2());
225
- printf("fsgsbase: %d\n", is.FSGSBASE());
226
- printf("bmi1: %d\n", is.BMI1());
227
- printf("hle: %d\n", is.HLE());
228
- printf("avx2: %d\n", is.AVX2());
229
- printf("bmi2: %d\n", is.BMI2());
230
- printf("erms: %d\n", is.ERMS());
231
- printf("invpcid: %d\n", is.INVPCID());
232
- printf("rtm: %d\n", is.RTM());
233
- printf("avx512f: %d\n", is.AVX512F());
234
- printf("rdseed: %d\n", is.RDSEED());
235
- printf("adx: %d\n", is.ADX());
236
- printf("avx512pf: %d\n", is.AVX512PF());
237
- printf("avx512er: %d\n", is.AVX512ER());
238
- printf("avx512cd: %d\n", is.AVX512CD());
239
- printf("sha: %d\n", is.SHA());
240
- printf("prefetchwt1: %d\n", is.PREFETCHWT1());
241
- printf("lahf: %d\n", is.LAHF());
242
- printf("lzcnt: %d\n", is.LZCNT());
243
- printf("abm: %d\n", is.ABM());
244
- printf("sse4a: %d\n", is.SSE4a());
245
- printf("xop: %d\n", is.XOP());
246
- printf("tbm: %d\n", is.TBM());
247
- printf("syscall: %d\n", is.SYSCALL());
248
- printf("mmxext: %d\n", is.MMXEXT());
249
- printf("rdtscp: %d\n", is.RDTSCP());
250
- printf("3dnowext: %d\n", is._3DNOWEXT());
251
- printf("3dnow: %d\n", is._3DNOW());
252
- printf("avx512_vbmi: %d\n", is.AVX512_VBMI());
253
- printf("avx512_vnni: %d\n", is.AVX512_VNNI());
254
- printf("avx512_fp16: %d\n", is.AVX512_FP16());
255
- printf("avx512_bf16: %d\n", is.AVX512_BF16());
256
- printf("amx_tile: %d\n", is.AMX_TILE());
257
- printf("amx_int8: %d\n", is.AMX_INT8());
258
- printf("amx_fp16: %d\n", is.AMX_FP16());
259
- printf("amx_bf16: %d\n", is.AMX_BF16());
260
- }
261
- #endif
262
-
263
- static int ggml_backend_cpu_x86_score() {
264
- // FIXME: this does not check for OS support
265
-
266
- int score = 1;
267
- cpuid_x86 is;
268
-
269
- #ifdef GGML_FMA
270
- if (!is.FMA()) { return 0; }
271
- score += 1;
272
- #endif
273
- #ifdef GGML_F16C
274
- if (!is.F16C()) { return 0; }
275
- score += 1<<1;
276
- #endif
277
- #ifdef GGML_SSE42
278
- if (!is.SSE42()) { return 0; }
279
- score += 1<<2;
280
- #endif
281
- #ifdef GGML_BMI2
282
- if (!is.BMI2()) { return 0; }
283
- score += 1<<3;
284
- #endif
285
- #ifdef GGML_AVX
286
- if (!is.AVX()) { return 0; }
287
- score += 1<<4;
288
- #endif
289
- #ifdef GGML_AVX2
290
- if (!is.AVX2()) { return 0; }
291
- score += 1<<5;
292
- #endif
293
- #ifdef GGML_AVX_VNNI
294
- if (!is.AVX_VNNI()) { return 0; }
295
- score += 1<<6;
296
- #endif
297
- #ifdef GGML_AVX512
298
- if (!is.AVX512F()) { return 0; }
299
- if (!is.AVX512CD()) { return 0; }
300
- if (!is.AVX512VL()) { return 0; }
301
- if (!is.AVX512DQ()) { return 0; }
302
- if (!is.AVX512BW()) { return 0; }
303
- score += 1<<7;
304
- #endif
305
- #ifdef GGML_AVX512_VBMI
306
- if (!is.AVX512_VBMI()) { return 0; }
307
- score += 1<<8;
308
- #endif
309
- #ifdef GGML_AVX512_BF16
310
- if (!is.AVX512_BF16()) { return 0; }
311
- score += 1<<9;
312
- #endif
313
- #ifdef GGML_AVX512_VNNI
314
- if (!is.AVX512_VNNI()) { return 0; }
315
- score += 1<<10;
316
- #endif
317
- #ifdef GGML_AMX_INT8
318
- if (!is.AMX_INT8()) { return 0; }
319
- score += 1<<11;
320
- #endif
321
-
322
- return score;
323
- }
324
-
325
- GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_x86_score)
326
-
327
- #endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ggml/src/ggml-cpu/ggml-cpu-hbm.cpp DELETED
@@ -1,55 +0,0 @@
1
- #ifdef GGML_USE_CPU_HBM
2
-
3
- #include "ggml-backend.h"
4
- #include "ggml-backend-impl.h"
5
- #include "ggml-cpu.h"
6
- #include "ggml-impl.h"
7
-
8
- #include "ggml-cpu-hbm.h"
9
-
10
- // buffer type HBM
11
-
12
- #include <hbwmalloc.h>
13
-
14
- static const char * ggml_backend_cpu_hbm_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
15
- return "CPU_HBM";
16
-
17
- GGML_UNUSED(buft);
18
- }
19
-
20
- static void ggml_backend_cpu_hbm_buffer_free_buffer(ggml_backend_buffer_t buffer) {
21
- hbw_free(buffer->context);
22
- }
23
-
24
- static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
25
- size_t size) {
26
- void * ptr;
27
- int result = hbw_posix_memalign(&ptr, ggml_backend_cpu_buffer_type_get_alignment(buft), size);
28
- if (result != 0) {
29
- GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size);
30
- return NULL;
31
- }
32
-
33
- ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
34
- buffer->buft = buft;
35
- buffer->iface.free_buffer = ggml_backend_cpu_hbm_buffer_free_buffer;
36
-
37
- return buffer;
38
- }
39
-
40
- ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
41
- static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
42
- /* .iface = */ {
43
- /* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
44
- /* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
45
- /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
46
- /* .get_max_size = */ nullptr, // defaults to SIZE_MAX
47
- /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
48
- /* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
49
- },
50
- /* .context = */ nullptr,
51
- };
52
-
53
- return &ggml_backend_cpu_buffer_type_hbm;
54
- }
55
- #endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ggml/src/ggml-cpu/ggml-cpu-hbm.h DELETED
@@ -1,8 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml-backend.h"
4
- #include "ggml.h"
5
-
6
- // GGML CPU internal header
7
-
8
- ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
 
 
 
 
 
 
 
 
 
ggml/src/ggml-cpu/ggml-cpu-traits.cpp DELETED
@@ -1,36 +0,0 @@
1
- #include "ggml-cpu-traits.h"
2
-
3
- #include "ggml-backend-impl.h"
4
- #include "ggml-backend.h"
5
-
6
- namespace ggml::cpu {
7
- tensor_traits::~tensor_traits() {}
8
-
9
- extra_buffer_type::~extra_buffer_type() {}
10
- } // namespace ggml::cpu
11
-
12
- bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) {
13
- for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
14
- if (extra && extra->context) {
15
- auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
16
- auto tensor_traits = buf_extra->get_tensor_traits(op);
17
- if (tensor_traits && tensor_traits->compute_forward(params, op)) {
18
- return true;
19
- }
20
- }
21
- }
22
- return false;
23
- }
24
-
25
- bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size) {
26
- for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
27
- if (extra && extra->context) {
28
- auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
29
- auto tensor_traits = buf_extra->get_tensor_traits(op);
30
- if (tensor_traits && tensor_traits->work_size(n_threads, op, *size)) {
31
- return true;
32
- }
33
- }
34
- }
35
- return false;
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ggml/src/ggml-cpu/ggml-cpu-traits.h DELETED
@@ -1,38 +0,0 @@
1
- #pragma once
2
- #include "ggml-backend-impl.h"
3
- #include "ggml-cpu-impl.h"
4
- #include "ggml.h"
5
-
6
- #ifdef __cplusplus
7
- # include <vector>
8
- extern "C" {
9
- #endif
10
-
11
- // return true if op part of extra "accelerator"
12
- bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op);
13
- bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size);
14
-
15
- #ifdef __cplusplus
16
- }
17
-
18
- namespace ggml::cpu {
19
- // register in tensor->extra
20
- class tensor_traits {
21
- public:
22
- virtual ~tensor_traits();
23
- virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0;
24
- virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0;
25
- };
26
-
27
- class extra_buffer_type {
28
- public:
29
- virtual ~extra_buffer_type();
30
- virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
31
- virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0;
32
- };
33
- } // namespace ggml::cpu
34
-
35
- // implemented in ggml-cpu.cpp.
36
- std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type();
37
-
38
- #endif