It looks like there is incorrect limit on the model context length. The fp16 like the original one have 131072 length. Updating this value resolved errors while processing longer prompts.

#2
Files changed (1) hide show
  1. config.json +1 -1
config.json CHANGED
@@ -20,7 +20,7 @@
20
  "intermediate_size": 8192,
21
  "interpolate_factor": 1,
22
  "lm_head_bias": false,
23
- "max_position_embeddings": 4096,
24
  "mlp_bias": false,
25
  "model_type": "phi3",
26
  "num_attention_heads": 24,
 
20
  "intermediate_size": 8192,
21
  "interpolate_factor": 1,
22
  "lm_head_bias": false,
23
+ "max_position_embeddings": 131072,
24
  "mlp_bias": false,
25
  "model_type": "phi3",
26
  "num_attention_heads": 24,