feat(api): add /ps endpoint to list running models for Ollama API
Browse files
lightrag/api/routers/ollama_api.py
CHANGED
@@ -95,6 +95,29 @@ class OllamaTagResponse(BaseModel):
|
|
95 |
models: List[OllamaModel]
|
96 |
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel:
|
99 |
"""
|
100 |
Parse request body based on Content-Type header.
|
@@ -237,6 +260,32 @@ class OllamaAPI:
|
|
237 |
}
|
238 |
]
|
239 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
|
241 |
@self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True)
|
242 |
async def generate(raw_request: Request):
|
|
|
95 |
models: List[OllamaModel]
|
96 |
|
97 |
|
98 |
+
class OllamaRunningModelDetails(BaseModel):
|
99 |
+
parent_model: str
|
100 |
+
format: str
|
101 |
+
family: str
|
102 |
+
families: List[str]
|
103 |
+
parameter_size: str
|
104 |
+
quantization_level: str
|
105 |
+
|
106 |
+
|
107 |
+
class OllamaRunningModel(BaseModel):
|
108 |
+
name: str
|
109 |
+
model: str
|
110 |
+
size: int
|
111 |
+
digest: str
|
112 |
+
details: OllamaRunningModelDetails
|
113 |
+
expires_at: str
|
114 |
+
size_vram: int
|
115 |
+
|
116 |
+
|
117 |
+
class OllamaPsResponse(BaseModel):
|
118 |
+
models: List[OllamaRunningModel]
|
119 |
+
|
120 |
+
|
121 |
async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel:
|
122 |
"""
|
123 |
Parse request body based on Content-Type header.
|
|
|
260 |
}
|
261 |
]
|
262 |
)
|
263 |
+
|
264 |
+
@self.router.get("/ps", dependencies=[Depends(combined_auth)])
|
265 |
+
async def get_running_models():
|
266 |
+
"""List Running Models - returns currently running models"""
|
267 |
+
return OllamaPsResponse(
|
268 |
+
models=[
|
269 |
+
{
|
270 |
+
"name": self.ollama_server_infos.LIGHTRAG_MODEL,
|
271 |
+
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
|
272 |
+
"size": self.ollama_server_infos.LIGHTRAG_SIZE,
|
273 |
+
"digest": self.ollama_server_infos.LIGHTRAG_DIGEST,
|
274 |
+
"details": {
|
275 |
+
"parent_model": "",
|
276 |
+
"format": "gguf",
|
277 |
+
"family": "llama",
|
278 |
+
"families": [
|
279 |
+
"llama"
|
280 |
+
],
|
281 |
+
"parameter_size": "7.2B",
|
282 |
+
"quantization_level": "Q4_0"
|
283 |
+
},
|
284 |
+
"expires_at": "2050-12-31T14:38:31.83753-07:00",
|
285 |
+
"size_vram": self.ollama_server_infos.LIGHTRAG_SIZE
|
286 |
+
}
|
287 |
+
]
|
288 |
+
)
|
289 |
|
290 |
@self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True)
|
291 |
async def generate(raw_request: Request):
|