Inference API Issue
#26
by
falcongoldman
- opened
import requests
import json
url = "https://router.huggingface.co/v1/chat/completions"
payload = json.dumps({
"messages": [
{
"role": "user",
"content": "How many G in huggingface?"
}
],
"model": "HuggingFaceTB/SmolLM3-3B:hf-inference",
"stream": False
})
headers = {
'Authorization': 'Bearer TOKEN',
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
I am getting 404 not found error.
I get a 404 here also. I'm not sure if it is pinging the same REST API, but I presume so. At first I thought I was doing something wrong, but I presume now it is plausible there may be a bigger issue:
from os import getenv
from huggingface_hub import InferenceClient
API_KEY = getenv("HF_TOKEN")
def ask_llm(prompt):
client = InferenceClient(
model="HuggingFaceTB/SmolLM3-3B",
token=API_KEY,
)
completion = client.text_generation(
prompt=prompt,
max_new_tokens=500,
temperature=0.6,
top_p=0.95,
repetition_penalty=1.1,
top_k=40,
return_full_text=True
)
x = completion.split("</think>")
if len(x) < 2:
raise ValueError("LLM API returned a malformed response:")
print(x)
return x