Upload 2 files
Browse files- GPT2.exe +1 -1
- main-ctx.cpp +1 -2
GPT2.exe
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 21223233
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7277090f60fd35553ee09bb0a110c36efb344614b2deec8441d21813b57c78b4
|
| 3 |
size 21223233
|
main-ctx.cpp
CHANGED
|
@@ -20,7 +20,6 @@
|
|
| 20 |
// default hparams (GPT-2 774M)
|
| 21 |
struct gpt_hparams {
|
| 22 |
int32_t n_vocab = 50257; // Vocabulary size remains the same
|
| 23 |
-
//int32_t n_ctx = 1024; // Maximum context length (sequence length)
|
| 24 |
int32_t n_embd = 1024; // Embedding dimensionality
|
| 25 |
int32_t n_head = 16; // Number of attention heads
|
| 26 |
int32_t n_layer = 24; // Number of transformer layers
|
|
@@ -32,7 +31,7 @@ struct gpt_hparams {
|
|
| 32 |
int32_t n_predict = 200; // new tokens to predict
|
| 33 |
int32_t n_parallel = 1; // number of parallel streams
|
| 34 |
int32_t n_batch = 32; // batch size for prompt processing
|
| 35 |
-
int32_t n_ctx =
|
| 36 |
int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU
|
| 37 |
|
| 38 |
bool ignore_eos = false; // ignore EOS token when generating text
|
|
|
|
| 20 |
// default hparams (GPT-2 774M)
|
| 21 |
struct gpt_hparams {
|
| 22 |
int32_t n_vocab = 50257; // Vocabulary size remains the same
|
|
|
|
| 23 |
int32_t n_embd = 1024; // Embedding dimensionality
|
| 24 |
int32_t n_head = 16; // Number of attention heads
|
| 25 |
int32_t n_layer = 24; // Number of transformer layers
|
|
|
|
| 31 |
int32_t n_predict = 200; // new tokens to predict
|
| 32 |
int32_t n_parallel = 1; // number of parallel streams
|
| 33 |
int32_t n_batch = 32; // batch size for prompt processing
|
| 34 |
+
int32_t n_ctx = 1024; // context size (this is the KV cache max size)
|
| 35 |
int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU
|
| 36 |
|
| 37 |
bool ignore_eos = false; // ignore EOS token when generating text
|