It looks like there is incorrect limit on the model context length. The fp16 like the original one have 131072 length. Updating this value resolved errors while processing longer prompts.

by dtrawins - opened Sep 3

←

Files changed (1) hide show

config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "intermediate_size": 8192,
   "interpolate_factor": 1,
   "lm_head_bias": false,
-  "max_position_embeddings": 4096,
   "mlp_bias": false,
   "model_type": "phi3",
   "num_attention_heads": 24,

   "intermediate_size": 8192,
   "interpolate_factor": 1,
   "lm_head_bias": false,
+  "max_position_embeddings": 131072,
   "mlp_bias": false,
   "model_type": "phi3",
   "num_attention_heads": 24,