GGUF
conversational

buggy output, repeating 'GGGGGGGG'

#1
by xceptor - opened

I have used llama.cpp hosted server, I am getting buggy response. Please check your

root@fb31ad54559f:/app# curl -X POST http://localhost:8080/completion   -H "Content-Type: application/json"   -d '{
    "prompt": "<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello<|im_end|><|im_assistant|>assistant<|im_middle|>Hi there<|im_end|><|im_user|>user<|im_middle|>How are you?<|im_end|><|im_assistant|>assistant<|im_middle|>",
    "n_predict": 128,
    "temperature": 0.7,
    "stop": ["<|im_end|>"],
    "stream": false
  }'
{"index":0,"content":"GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG","tokens":[],"id_slot":0,"stop":true,"model":"gpt-3.5-turbo","tokens_predicted":128,"tokens_evaluated":31,"generation_settings":{"n_predict":128,"seed":4294967295,"temperature":0.699999988079071,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"top_k":40,"top_p":0.949999988079071,"min_p":0.05000000074505806,"top_n_sigma":-1.0,"xtc_probability":0.0,"xtc_threshold":0.10000000149011612,"typical_p":1.0,"repeat_last_n":64,"repeat_penalty":1.0,"presence_penalty":0.0,"frequency_penalty":0.0,"dry_multiplier":0.0,"dry_base":1.75,"dry_allowed_length":2,"dry_penalty_last_n":64000,"dry_sequence_breakers":["\n",":","\"","*"],"mirostat":0,"mirostat_tau":5.0,"mirostat_eta":0.10000000149011612,"stop":["<|im_end|>"],"max_tokens":128,"n_keep":0,"n_discard":0,"ignore_eos":false,"stream":false,"logit_bias":[],"n_probs":0,"min_keep":0,"grammar":"","grammar_lazy":false,"grammar_triggers":[],"preserved_tokens":[],"chat_format":"Content-only","reasoning_format":"deepseek","reasoning_in_content":false,"thinking_forced_open":false,"samplers":["penalties","dry","top_n_sigma","top_k","typ_p","top_p","min_p","xtc","temperature"],"speculative.n_max":16,"speculative.n_min":0,"speculative.p_min":0.75,"timings_per_token":false,"post_sampling_probs":false,"lora":[]},"prompt":"<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello<|im_end|><|im_assistant|>assistant<|im_middle|>Hi there<|im_end|><|im_user|>user<|im_middle|>How are you?<|im_end|><|im_assistant|>assistant<|im_middle|>","has_new_line":false,"truncated":false,"stop_type":"limit","stopping_word":"","tokens_cached":158,"timings":{"prompt_n":31,"prompt_ms":1089.614,"prompt_per_token_ms":35.14883870967742,"prompt_per_second":28.450442083159725,"predicted_n":128,"predicted_ms":18273.078,"predicted_per_token_ms":142.758421875,"predicted_per_second":7.004840673257127}}

I also checked with OpenAI api, that is also giving same output GGGGGG...

I am using SPR 2 socket CPU system.

Sign up or log in to comment