Fix linting
Browse files
lightrag/api/routers/ollama_api.py
CHANGED
@@ -118,20 +118,22 @@ class OllamaPsResponse(BaseModel):
|
|
118 |
models: List[OllamaRunningModel]
|
119 |
|
120 |
|
121 |
-
async def parse_request_body(
|
|
|
|
|
122 |
"""
|
123 |
Parse request body based on Content-Type header.
|
124 |
Supports both application/json and application/octet-stream.
|
125 |
-
|
126 |
Args:
|
127 |
request: The FastAPI Request object
|
128 |
model_class: The Pydantic model class to parse the request into
|
129 |
-
|
130 |
Returns:
|
131 |
An instance of the provided model_class
|
132 |
"""
|
133 |
content_type = request.headers.get("content-type", "").lower()
|
134 |
-
|
135 |
try:
|
136 |
if content_type.startswith("application/json"):
|
137 |
# FastAPI already handles JSON parsing for us
|
@@ -139,23 +141,19 @@ async def parse_request_body(request: Request, model_class: Type[BaseModel]) ->
|
|
139 |
elif content_type.startswith("application/octet-stream"):
|
140 |
# Manually parse octet-stream as JSON
|
141 |
body_bytes = await request.body()
|
142 |
-
body = json.loads(body_bytes.decode(
|
143 |
else:
|
144 |
# Try to parse as JSON for any other content type
|
145 |
body_bytes = await request.body()
|
146 |
-
body = json.loads(body_bytes.decode(
|
147 |
-
|
148 |
# Create an instance of the model
|
149 |
return model_class(**body)
|
150 |
except json.JSONDecodeError:
|
151 |
-
raise HTTPException(
|
152 |
-
status_code=400,
|
153 |
-
detail="Invalid JSON in request body"
|
154 |
-
)
|
155 |
except Exception as e:
|
156 |
raise HTTPException(
|
157 |
-
status_code=400,
|
158 |
-
detail=f"Error parsing request body: {str(e)}"
|
159 |
)
|
160 |
|
161 |
|
@@ -260,7 +258,7 @@ class OllamaAPI:
|
|
260 |
}
|
261 |
]
|
262 |
)
|
263 |
-
|
264 |
@self.router.get("/ps", dependencies=[Depends(combined_auth)])
|
265 |
async def get_running_models():
|
266 |
"""List Running Models - returns currently running models"""
|
@@ -275,19 +273,19 @@ class OllamaAPI:
|
|
275 |
"parent_model": "",
|
276 |
"format": "gguf",
|
277 |
"family": "llama",
|
278 |
-
"families": [
|
279 |
-
"llama"
|
280 |
-
],
|
281 |
"parameter_size": "7.2B",
|
282 |
-
"quantization_level": "Q4_0"
|
283 |
},
|
284 |
"expires_at": "2050-12-31T14:38:31.83753-07:00",
|
285 |
-
"size_vram": self.ollama_server_infos.LIGHTRAG_SIZE
|
286 |
}
|
287 |
]
|
288 |
)
|
289 |
|
290 |
-
@self.router.post(
|
|
|
|
|
291 |
async def generate(raw_request: Request):
|
292 |
"""Handle generate completion requests acting as an Ollama model
|
293 |
For compatibility purpose, the request is not processed by LightRAG,
|
@@ -297,7 +295,7 @@ class OllamaAPI:
|
|
297 |
try:
|
298 |
# Parse the request body manually
|
299 |
request = await parse_request_body(raw_request, OllamaGenerateRequest)
|
300 |
-
|
301 |
query = request.prompt
|
302 |
start_time = time.time_ns()
|
303 |
prompt_tokens = estimate_tokens(query)
|
@@ -457,7 +455,9 @@ class OllamaAPI:
|
|
457 |
trace_exception(e)
|
458 |
raise HTTPException(status_code=500, detail=str(e))
|
459 |
|
460 |
-
@self.router.post(
|
|
|
|
|
461 |
async def chat(raw_request: Request):
|
462 |
"""Process chat completion requests acting as an Ollama model
|
463 |
Routes user queries through LightRAG by selecting query mode based on prefix indicators.
|
@@ -467,7 +467,7 @@ class OllamaAPI:
|
|
467 |
try:
|
468 |
# Parse the request body manually
|
469 |
request = await parse_request_body(raw_request, OllamaChatRequest)
|
470 |
-
|
471 |
# Get all messages
|
472 |
messages = request.messages
|
473 |
if not messages:
|
|
|
118 |
models: List[OllamaRunningModel]
|
119 |
|
120 |
|
121 |
+
async def parse_request_body(
|
122 |
+
request: Request, model_class: Type[BaseModel]
|
123 |
+
) -> BaseModel:
|
124 |
"""
|
125 |
Parse request body based on Content-Type header.
|
126 |
Supports both application/json and application/octet-stream.
|
127 |
+
|
128 |
Args:
|
129 |
request: The FastAPI Request object
|
130 |
model_class: The Pydantic model class to parse the request into
|
131 |
+
|
132 |
Returns:
|
133 |
An instance of the provided model_class
|
134 |
"""
|
135 |
content_type = request.headers.get("content-type", "").lower()
|
136 |
+
|
137 |
try:
|
138 |
if content_type.startswith("application/json"):
|
139 |
# FastAPI already handles JSON parsing for us
|
|
|
141 |
elif content_type.startswith("application/octet-stream"):
|
142 |
# Manually parse octet-stream as JSON
|
143 |
body_bytes = await request.body()
|
144 |
+
body = json.loads(body_bytes.decode("utf-8"))
|
145 |
else:
|
146 |
# Try to parse as JSON for any other content type
|
147 |
body_bytes = await request.body()
|
148 |
+
body = json.loads(body_bytes.decode("utf-8"))
|
149 |
+
|
150 |
# Create an instance of the model
|
151 |
return model_class(**body)
|
152 |
except json.JSONDecodeError:
|
153 |
+
raise HTTPException(status_code=400, detail="Invalid JSON in request body")
|
|
|
|
|
|
|
154 |
except Exception as e:
|
155 |
raise HTTPException(
|
156 |
+
status_code=400, detail=f"Error parsing request body: {str(e)}"
|
|
|
157 |
)
|
158 |
|
159 |
|
|
|
258 |
}
|
259 |
]
|
260 |
)
|
261 |
+
|
262 |
@self.router.get("/ps", dependencies=[Depends(combined_auth)])
|
263 |
async def get_running_models():
|
264 |
"""List Running Models - returns currently running models"""
|
|
|
273 |
"parent_model": "",
|
274 |
"format": "gguf",
|
275 |
"family": "llama",
|
276 |
+
"families": ["llama"],
|
|
|
|
|
277 |
"parameter_size": "7.2B",
|
278 |
+
"quantization_level": "Q4_0",
|
279 |
},
|
280 |
"expires_at": "2050-12-31T14:38:31.83753-07:00",
|
281 |
+
"size_vram": self.ollama_server_infos.LIGHTRAG_SIZE,
|
282 |
}
|
283 |
]
|
284 |
)
|
285 |
|
286 |
+
@self.router.post(
|
287 |
+
"/generate", dependencies=[Depends(combined_auth)], include_in_schema=True
|
288 |
+
)
|
289 |
async def generate(raw_request: Request):
|
290 |
"""Handle generate completion requests acting as an Ollama model
|
291 |
For compatibility purpose, the request is not processed by LightRAG,
|
|
|
295 |
try:
|
296 |
# Parse the request body manually
|
297 |
request = await parse_request_body(raw_request, OllamaGenerateRequest)
|
298 |
+
|
299 |
query = request.prompt
|
300 |
start_time = time.time_ns()
|
301 |
prompt_tokens = estimate_tokens(query)
|
|
|
455 |
trace_exception(e)
|
456 |
raise HTTPException(status_code=500, detail=str(e))
|
457 |
|
458 |
+
@self.router.post(
|
459 |
+
"/chat", dependencies=[Depends(combined_auth)], include_in_schema=True
|
460 |
+
)
|
461 |
async def chat(raw_request: Request):
|
462 |
"""Process chat completion requests acting as an Ollama model
|
463 |
Routes user queries through LightRAG by selecting query mode based on prefix indicators.
|
|
|
467 |
try:
|
468 |
# Parse the request body manually
|
469 |
request = await parse_request_body(raw_request, OllamaChatRequest)
|
470 |
+
|
471 |
# Get all messages
|
472 |
messages = request.messages
|
473 |
if not messages:
|