Merge pull request #666 from ParisNeo/main
Browse filesAdded the possibility to change the name of the model on ollama service
- lightrag/api/lightrag_server.py +76 -60
lightrag/api/lightrag_server.py
CHANGED
@@ -48,18 +48,23 @@ def estimate_tokens(text: str) -> int:
|
|
48 |
return int(tokens)
|
49 |
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
# read config.ini
|
65 |
config = configparser.ConfigParser()
|
@@ -68,8 +73,8 @@ config.read("config.ini", "utf-8")
|
|
68 |
redis_uri = config.get("redis", "uri", fallback=None)
|
69 |
if redis_uri:
|
70 |
os.environ["REDIS_URI"] = redis_uri
|
71 |
-
KV_STORAGE = "RedisKVStorage"
|
72 |
-
DOC_STATUS_STORAGE = "RedisKVStorage"
|
73 |
|
74 |
# Neo4j config
|
75 |
neo4j_uri = config.get("neo4j", "uri", fallback=None)
|
@@ -79,7 +84,7 @@ if neo4j_uri:
|
|
79 |
os.environ["NEO4J_URI"] = neo4j_uri
|
80 |
os.environ["NEO4J_USERNAME"] = neo4j_username
|
81 |
os.environ["NEO4J_PASSWORD"] = neo4j_password
|
82 |
-
GRAPH_STORAGE = "Neo4JStorage"
|
83 |
|
84 |
# Milvus config
|
85 |
milvus_uri = config.get("milvus", "uri", fallback=None)
|
@@ -91,7 +96,7 @@ if milvus_uri:
|
|
91 |
os.environ["MILVUS_USER"] = milvus_user
|
92 |
os.environ["MILVUS_PASSWORD"] = milvus_password
|
93 |
os.environ["MILVUS_DB_NAME"] = milvus_db_name
|
94 |
-
VECTOR_STORAGE = "MilvusVectorDBStorge"
|
95 |
|
96 |
# MongoDB config
|
97 |
mongo_uri = config.get("mongodb", "uri", fallback=None)
|
@@ -99,8 +104,8 @@ mongo_database = config.get("mongodb", "LightRAG", fallback=None)
|
|
99 |
if mongo_uri:
|
100 |
os.environ["MONGO_URI"] = mongo_uri
|
101 |
os.environ["MONGO_DATABASE"] = mongo_database
|
102 |
-
KV_STORAGE = "MongoKVStorage"
|
103 |
-
DOC_STATUS_STORAGE = "MongoKVStorage"
|
104 |
|
105 |
|
106 |
def get_default_host(binding_type: str) -> str:
|
@@ -213,7 +218,7 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
213 |
# System Configuration
|
214 |
ASCIIColors.magenta("\nπ οΈ System Configuration:")
|
215 |
ASCIIColors.white(" ββ Ollama Emulating Model: ", end="")
|
216 |
-
ASCIIColors.yellow(f"{LIGHTRAG_MODEL}")
|
217 |
ASCIIColors.white(" ββ Log Level: ", end="")
|
218 |
ASCIIColors.yellow(f"{args.log_level}")
|
219 |
ASCIIColors.white(" ββ Timeout: ", end="")
|
@@ -484,8 +489,19 @@ def parse_args() -> argparse.Namespace:
|
|
484 |
help="Number of conversation history turns to include (default: from env or 3)",
|
485 |
)
|
486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
487 |
args = parser.parse_args()
|
488 |
|
|
|
|
|
489 |
return args
|
490 |
|
491 |
|
@@ -538,7 +554,7 @@ class OllamaMessage(BaseModel):
|
|
538 |
|
539 |
|
540 |
class OllamaChatRequest(BaseModel):
|
541 |
-
model: str = LIGHTRAG_MODEL
|
542 |
messages: List[OllamaMessage]
|
543 |
stream: bool = True # Default to streaming mode
|
544 |
options: Optional[Dict[str, Any]] = None
|
@@ -553,7 +569,7 @@ class OllamaChatResponse(BaseModel):
|
|
553 |
|
554 |
|
555 |
class OllamaGenerateRequest(BaseModel):
|
556 |
-
model: str = LIGHTRAG_MODEL
|
557 |
prompt: str
|
558 |
system: Optional[str] = None
|
559 |
stream: bool = False
|
@@ -842,10 +858,10 @@ def create_app(args):
|
|
842 |
if args.llm_binding == "lollms" or args.llm_binding == "ollama"
|
843 |
else {},
|
844 |
embedding_func=embedding_func,
|
845 |
-
kv_storage=KV_STORAGE,
|
846 |
-
graph_storage=GRAPH_STORAGE,
|
847 |
-
vector_storage=VECTOR_STORAGE,
|
848 |
-
doc_status_storage=DOC_STATUS_STORAGE,
|
849 |
)
|
850 |
else:
|
851 |
rag = LightRAG(
|
@@ -859,10 +875,10 @@ def create_app(args):
|
|
859 |
llm_model_max_async=args.max_async,
|
860 |
llm_model_max_token_size=args.max_tokens,
|
861 |
embedding_func=embedding_func,
|
862 |
-
kv_storage=KV_STORAGE,
|
863 |
-
graph_storage=GRAPH_STORAGE,
|
864 |
-
vector_storage=VECTOR_STORAGE,
|
865 |
-
doc_status_storage=DOC_STATUS_STORAGE,
|
866 |
)
|
867 |
|
868 |
async def index_file(file_path: Union[str, Path]) -> None:
|
@@ -1423,16 +1439,16 @@ def create_app(args):
|
|
1423 |
return OllamaTagResponse(
|
1424 |
models=[
|
1425 |
{
|
1426 |
-
"name": LIGHTRAG_MODEL,
|
1427 |
-
"model": LIGHTRAG_MODEL,
|
1428 |
-
"size": LIGHTRAG_SIZE,
|
1429 |
-
"digest": LIGHTRAG_DIGEST,
|
1430 |
-
"modified_at": LIGHTRAG_CREATED_AT,
|
1431 |
"details": {
|
1432 |
"parent_model": "",
|
1433 |
"format": "gguf",
|
1434 |
-
"family": LIGHTRAG_NAME,
|
1435 |
-
"families": [LIGHTRAG_NAME],
|
1436 |
"parameter_size": "13B",
|
1437 |
"quantization_level": "Q4_0",
|
1438 |
},
|
@@ -1495,8 +1511,8 @@ def create_app(args):
|
|
1495 |
total_response = response
|
1496 |
|
1497 |
data = {
|
1498 |
-
"model": LIGHTRAG_MODEL,
|
1499 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1500 |
"response": response,
|
1501 |
"done": False,
|
1502 |
}
|
@@ -1508,8 +1524,8 @@ def create_app(args):
|
|
1508 |
eval_time = last_chunk_time - first_chunk_time
|
1509 |
|
1510 |
data = {
|
1511 |
-
"model": LIGHTRAG_MODEL,
|
1512 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1513 |
"done": True,
|
1514 |
"total_duration": total_time,
|
1515 |
"load_duration": 0,
|
@@ -1529,8 +1545,8 @@ def create_app(args):
|
|
1529 |
|
1530 |
total_response += chunk
|
1531 |
data = {
|
1532 |
-
"model": LIGHTRAG_MODEL,
|
1533 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1534 |
"response": chunk,
|
1535 |
"done": False,
|
1536 |
}
|
@@ -1542,8 +1558,8 @@ def create_app(args):
|
|
1542 |
eval_time = last_chunk_time - first_chunk_time
|
1543 |
|
1544 |
data = {
|
1545 |
-
"model": LIGHTRAG_MODEL,
|
1546 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1547 |
"done": True,
|
1548 |
"total_duration": total_time,
|
1549 |
"load_duration": 0,
|
@@ -1587,8 +1603,8 @@ def create_app(args):
|
|
1587 |
eval_time = last_chunk_time - first_chunk_time
|
1588 |
|
1589 |
return {
|
1590 |
-
"model": LIGHTRAG_MODEL,
|
1591 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1592 |
"response": str(response_text),
|
1593 |
"done": True,
|
1594 |
"total_duration": total_time,
|
@@ -1660,8 +1676,8 @@ def create_app(args):
|
|
1660 |
total_response = response
|
1661 |
|
1662 |
data = {
|
1663 |
-
"model": LIGHTRAG_MODEL,
|
1664 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1665 |
"message": {
|
1666 |
"role": "assistant",
|
1667 |
"content": response,
|
@@ -1677,8 +1693,8 @@ def create_app(args):
|
|
1677 |
eval_time = last_chunk_time - first_chunk_time
|
1678 |
|
1679 |
data = {
|
1680 |
-
"model": LIGHTRAG_MODEL,
|
1681 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1682 |
"done": True,
|
1683 |
"total_duration": total_time,
|
1684 |
"load_duration": 0,
|
@@ -1698,8 +1714,8 @@ def create_app(args):
|
|
1698 |
|
1699 |
total_response += chunk
|
1700 |
data = {
|
1701 |
-
"model": LIGHTRAG_MODEL,
|
1702 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1703 |
"message": {
|
1704 |
"role": "assistant",
|
1705 |
"content": chunk,
|
@@ -1715,8 +1731,8 @@ def create_app(args):
|
|
1715 |
eval_time = last_chunk_time - first_chunk_time
|
1716 |
|
1717 |
data = {
|
1718 |
-
"model": LIGHTRAG_MODEL,
|
1719 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1720 |
"done": True,
|
1721 |
"total_duration": total_time,
|
1722 |
"load_duration": 0,
|
@@ -1771,8 +1787,8 @@ def create_app(args):
|
|
1771 |
eval_time = last_chunk_time - first_chunk_time
|
1772 |
|
1773 |
return {
|
1774 |
-
"model": LIGHTRAG_MODEL,
|
1775 |
-
"created_at": LIGHTRAG_CREATED_AT,
|
1776 |
"message": {
|
1777 |
"role": "assistant",
|
1778 |
"content": str(response_text),
|
@@ -1815,10 +1831,10 @@ def create_app(args):
|
|
1815 |
"embedding_binding_host": args.embedding_binding_host,
|
1816 |
"embedding_model": args.embedding_model,
|
1817 |
"max_tokens": args.max_tokens,
|
1818 |
-
"kv_storage": KV_STORAGE,
|
1819 |
-
"doc_status_storage": DOC_STATUS_STORAGE,
|
1820 |
-
"graph_storage": GRAPH_STORAGE,
|
1821 |
-
"vector_storage": VECTOR_STORAGE,
|
1822 |
},
|
1823 |
}
|
1824 |
|
|
|
48 |
return int(tokens)
|
49 |
|
50 |
|
51 |
+
class OllamaServerInfos:
|
52 |
+
# Constants for emulated Ollama model information
|
53 |
+
LIGHTRAG_NAME = "lightrag"
|
54 |
+
LIGHTRAG_TAG = os.getenv("OLLAMA_EMULATING_MODEL_TAG", "latest")
|
55 |
+
LIGHTRAG_MODEL = f"{LIGHTRAG_NAME}:{LIGHTRAG_TAG}"
|
56 |
+
LIGHTRAG_SIZE = 7365960935 # it's a dummy value
|
57 |
+
LIGHTRAG_CREATED_AT = "2024-01-15T00:00:00Z"
|
58 |
+
LIGHTRAG_DIGEST = "sha256:lightrag"
|
59 |
+
|
60 |
+
KV_STORAGE = "JsonKVStorage"
|
61 |
+
DOC_STATUS_STORAGE = "JsonDocStatusStorage"
|
62 |
+
GRAPH_STORAGE = "NetworkXStorage"
|
63 |
+
VECTOR_STORAGE = "NanoVectorDBStorage"
|
64 |
+
|
65 |
+
|
66 |
+
# Add infos
|
67 |
+
ollama_server_infos = OllamaServerInfos()
|
68 |
|
69 |
# read config.ini
|
70 |
config = configparser.ConfigParser()
|
|
|
73 |
redis_uri = config.get("redis", "uri", fallback=None)
|
74 |
if redis_uri:
|
75 |
os.environ["REDIS_URI"] = redis_uri
|
76 |
+
ollama_server_infos.KV_STORAGE = "RedisKVStorage"
|
77 |
+
ollama_server_infos.DOC_STATUS_STORAGE = "RedisKVStorage"
|
78 |
|
79 |
# Neo4j config
|
80 |
neo4j_uri = config.get("neo4j", "uri", fallback=None)
|
|
|
84 |
os.environ["NEO4J_URI"] = neo4j_uri
|
85 |
os.environ["NEO4J_USERNAME"] = neo4j_username
|
86 |
os.environ["NEO4J_PASSWORD"] = neo4j_password
|
87 |
+
ollama_server_infos.GRAPH_STORAGE = "Neo4JStorage"
|
88 |
|
89 |
# Milvus config
|
90 |
milvus_uri = config.get("milvus", "uri", fallback=None)
|
|
|
96 |
os.environ["MILVUS_USER"] = milvus_user
|
97 |
os.environ["MILVUS_PASSWORD"] = milvus_password
|
98 |
os.environ["MILVUS_DB_NAME"] = milvus_db_name
|
99 |
+
ollama_server_infos.VECTOR_STORAGE = "MilvusVectorDBStorge"
|
100 |
|
101 |
# MongoDB config
|
102 |
mongo_uri = config.get("mongodb", "uri", fallback=None)
|
|
|
104 |
if mongo_uri:
|
105 |
os.environ["MONGO_URI"] = mongo_uri
|
106 |
os.environ["MONGO_DATABASE"] = mongo_database
|
107 |
+
ollama_server_infos.KV_STORAGE = "MongoKVStorage"
|
108 |
+
ollama_server_infos.DOC_STATUS_STORAGE = "MongoKVStorage"
|
109 |
|
110 |
|
111 |
def get_default_host(binding_type: str) -> str:
|
|
|
218 |
# System Configuration
|
219 |
ASCIIColors.magenta("\nπ οΈ System Configuration:")
|
220 |
ASCIIColors.white(" ββ Ollama Emulating Model: ", end="")
|
221 |
+
ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
|
222 |
ASCIIColors.white(" ββ Log Level: ", end="")
|
223 |
ASCIIColors.yellow(f"{args.log_level}")
|
224 |
ASCIIColors.white(" ββ Timeout: ", end="")
|
|
|
489 |
help="Number of conversation history turns to include (default: from env or 3)",
|
490 |
)
|
491 |
|
492 |
+
parser.add_argument(
|
493 |
+
"--simulated-model-name",
|
494 |
+
type=str,
|
495 |
+
default=get_env_value(
|
496 |
+
"SIMULATED_MODEL_NAME", ollama_server_infos.LIGHTRAG_MODEL
|
497 |
+
),
|
498 |
+
help="Number of conversation history turns to include (default: from env or 3)",
|
499 |
+
)
|
500 |
+
|
501 |
args = parser.parse_args()
|
502 |
|
503 |
+
ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
|
504 |
+
|
505 |
return args
|
506 |
|
507 |
|
|
|
554 |
|
555 |
|
556 |
class OllamaChatRequest(BaseModel):
|
557 |
+
model: str = ollama_server_infos.LIGHTRAG_MODEL
|
558 |
messages: List[OllamaMessage]
|
559 |
stream: bool = True # Default to streaming mode
|
560 |
options: Optional[Dict[str, Any]] = None
|
|
|
569 |
|
570 |
|
571 |
class OllamaGenerateRequest(BaseModel):
|
572 |
+
model: str = ollama_server_infos.LIGHTRAG_MODEL
|
573 |
prompt: str
|
574 |
system: Optional[str] = None
|
575 |
stream: bool = False
|
|
|
858 |
if args.llm_binding == "lollms" or args.llm_binding == "ollama"
|
859 |
else {},
|
860 |
embedding_func=embedding_func,
|
861 |
+
kv_storage=ollama_server_infos.KV_STORAGE,
|
862 |
+
graph_storage=ollama_server_infos.GRAPH_STORAGE,
|
863 |
+
vector_storage=ollama_server_infos.VECTOR_STORAGE,
|
864 |
+
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
|
865 |
)
|
866 |
else:
|
867 |
rag = LightRAG(
|
|
|
875 |
llm_model_max_async=args.max_async,
|
876 |
llm_model_max_token_size=args.max_tokens,
|
877 |
embedding_func=embedding_func,
|
878 |
+
kv_storage=ollama_server_infos.KV_STORAGE,
|
879 |
+
graph_storage=ollama_server_infos.GRAPH_STORAGE,
|
880 |
+
vector_storage=ollama_server_infos.VECTOR_STORAGE,
|
881 |
+
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
|
882 |
)
|
883 |
|
884 |
async def index_file(file_path: Union[str, Path]) -> None:
|
|
|
1439 |
return OllamaTagResponse(
|
1440 |
models=[
|
1441 |
{
|
1442 |
+
"name": ollama_server_infos.LIGHTRAG_MODEL,
|
1443 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1444 |
+
"size": ollama_server_infos.LIGHTRAG_SIZE,
|
1445 |
+
"digest": ollama_server_infos.LIGHTRAG_DIGEST,
|
1446 |
+
"modified_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1447 |
"details": {
|
1448 |
"parent_model": "",
|
1449 |
"format": "gguf",
|
1450 |
+
"family": ollama_server_infos.LIGHTRAG_NAME,
|
1451 |
+
"families": [ollama_server_infos.LIGHTRAG_NAME],
|
1452 |
"parameter_size": "13B",
|
1453 |
"quantization_level": "Q4_0",
|
1454 |
},
|
|
|
1511 |
total_response = response
|
1512 |
|
1513 |
data = {
|
1514 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1515 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1516 |
"response": response,
|
1517 |
"done": False,
|
1518 |
}
|
|
|
1524 |
eval_time = last_chunk_time - first_chunk_time
|
1525 |
|
1526 |
data = {
|
1527 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1528 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1529 |
"done": True,
|
1530 |
"total_duration": total_time,
|
1531 |
"load_duration": 0,
|
|
|
1545 |
|
1546 |
total_response += chunk
|
1547 |
data = {
|
1548 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1549 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1550 |
"response": chunk,
|
1551 |
"done": False,
|
1552 |
}
|
|
|
1558 |
eval_time = last_chunk_time - first_chunk_time
|
1559 |
|
1560 |
data = {
|
1561 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1562 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1563 |
"done": True,
|
1564 |
"total_duration": total_time,
|
1565 |
"load_duration": 0,
|
|
|
1603 |
eval_time = last_chunk_time - first_chunk_time
|
1604 |
|
1605 |
return {
|
1606 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1607 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1608 |
"response": str(response_text),
|
1609 |
"done": True,
|
1610 |
"total_duration": total_time,
|
|
|
1676 |
total_response = response
|
1677 |
|
1678 |
data = {
|
1679 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1680 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1681 |
"message": {
|
1682 |
"role": "assistant",
|
1683 |
"content": response,
|
|
|
1693 |
eval_time = last_chunk_time - first_chunk_time
|
1694 |
|
1695 |
data = {
|
1696 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1697 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1698 |
"done": True,
|
1699 |
"total_duration": total_time,
|
1700 |
"load_duration": 0,
|
|
|
1714 |
|
1715 |
total_response += chunk
|
1716 |
data = {
|
1717 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1718 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1719 |
"message": {
|
1720 |
"role": "assistant",
|
1721 |
"content": chunk,
|
|
|
1731 |
eval_time = last_chunk_time - first_chunk_time
|
1732 |
|
1733 |
data = {
|
1734 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1735 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1736 |
"done": True,
|
1737 |
"total_duration": total_time,
|
1738 |
"load_duration": 0,
|
|
|
1787 |
eval_time = last_chunk_time - first_chunk_time
|
1788 |
|
1789 |
return {
|
1790 |
+
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
1791 |
+
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
1792 |
"message": {
|
1793 |
"role": "assistant",
|
1794 |
"content": str(response_text),
|
|
|
1831 |
"embedding_binding_host": args.embedding_binding_host,
|
1832 |
"embedding_model": args.embedding_model,
|
1833 |
"max_tokens": args.max_tokens,
|
1834 |
+
"kv_storage": ollama_server_infos.KV_STORAGE,
|
1835 |
+
"doc_status_storage": ollama_server_infos.DOC_STATUS_STORAGE,
|
1836 |
+
"graph_storage": ollama_server_infos.GRAPH_STORAGE,
|
1837 |
+
"vector_storage": ollama_server_infos.VECTOR_STORAGE,
|
1838 |
},
|
1839 |
}
|
1840 |
|