Inference API Issue

#26
by falcongoldman - opened

import requests
import json

url = "https://router.huggingface.co/v1/chat/completions"

payload = json.dumps({
"messages": [
{
"role": "user",
"content": "How many G in huggingface?"
}
],
"model": "HuggingFaceTB/SmolLM3-3B:hf-inference",
"stream": False
})
headers = {
'Authorization': 'Bearer TOKEN',
'Content-Type': 'application/json'
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

I am getting 404 not found error.

I get a 404 here also. I'm not sure if it is pinging the same REST API, but I presume so. At first I thought I was doing something wrong, but I presume now it is plausible there may be a bigger issue:

from os import getenv
from huggingface_hub import InferenceClient

API_KEY = getenv("HF_TOKEN") 

def ask_llm(prompt):
    client = InferenceClient(
        model="HuggingFaceTB/SmolLM3-3B", 
        token=API_KEY,
    )

    completion = client.text_generation(
        prompt=prompt,
        max_new_tokens=500,
        temperature=0.6,
        top_p=0.95,
        repetition_penalty=1.1,     
        top_k=40,
        return_full_text=True
    )

    x = completion.split("</think>")
    if len(x) < 2:
        raise ValueError("LLM API returned a malformed response:")
        print(x)

    return x

Sign up or log in to comment