It looks like there is incorrect limit on the model context length. The fp16 like the original one have 131072 length. Updating this value resolved errors while processing longer prompts.
#2
by
dtrawins
- opened
- config.json +1 -1
config.json
CHANGED
@@ -20,7 +20,7 @@
|
|
20 |
"intermediate_size": 8192,
|
21 |
"interpolate_factor": 1,
|
22 |
"lm_head_bias": false,
|
23 |
-
"max_position_embeddings":
|
24 |
"mlp_bias": false,
|
25 |
"model_type": "phi3",
|
26 |
"num_attention_heads": 24,
|
|
|
20 |
"intermediate_size": 8192,
|
21 |
"interpolate_factor": 1,
|
22 |
"lm_head_bias": false,
|
23 |
+
"max_position_embeddings": 131072,
|
24 |
"mlp_bias": false,
|
25 |
"model_type": "phi3",
|
26 |
"num_attention_heads": 24,
|