zrguo commited on
Commit
d041c62
Β·
unverified Β·
2 Parent(s): df6ae9c 7f55f0c

Merge pull request #666 from ParisNeo/main

Browse files

Added the possibility to change the name of the model on ollama service

Files changed (1) hide show
  1. lightrag/api/lightrag_server.py +76 -60
lightrag/api/lightrag_server.py CHANGED
@@ -48,18 +48,23 @@ def estimate_tokens(text: str) -> int:
48
  return int(tokens)
49
 
50
 
51
- # Constants for emulated Ollama model information
52
- LIGHTRAG_NAME = "lightrag"
53
- LIGHTRAG_TAG = os.getenv("OLLAMA_EMULATING_MODEL_TAG", "latest")
54
- LIGHTRAG_MODEL = f"{LIGHTRAG_NAME}:{LIGHTRAG_TAG}"
55
- LIGHTRAG_SIZE = 7365960935 # it's a dummy value
56
- LIGHTRAG_CREATED_AT = "2024-01-15T00:00:00Z"
57
- LIGHTRAG_DIGEST = "sha256:lightrag"
58
-
59
- KV_STORAGE = "JsonKVStorage"
60
- DOC_STATUS_STORAGE = "JsonDocStatusStorage"
61
- GRAPH_STORAGE = "NetworkXStorage"
62
- VECTOR_STORAGE = "NanoVectorDBStorage"
 
 
 
 
 
63
 
64
  # read config.ini
65
  config = configparser.ConfigParser()
@@ -68,8 +73,8 @@ config.read("config.ini", "utf-8")
68
  redis_uri = config.get("redis", "uri", fallback=None)
69
  if redis_uri:
70
  os.environ["REDIS_URI"] = redis_uri
71
- KV_STORAGE = "RedisKVStorage"
72
- DOC_STATUS_STORAGE = "RedisKVStorage"
73
 
74
  # Neo4j config
75
  neo4j_uri = config.get("neo4j", "uri", fallback=None)
@@ -79,7 +84,7 @@ if neo4j_uri:
79
  os.environ["NEO4J_URI"] = neo4j_uri
80
  os.environ["NEO4J_USERNAME"] = neo4j_username
81
  os.environ["NEO4J_PASSWORD"] = neo4j_password
82
- GRAPH_STORAGE = "Neo4JStorage"
83
 
84
  # Milvus config
85
  milvus_uri = config.get("milvus", "uri", fallback=None)
@@ -91,7 +96,7 @@ if milvus_uri:
91
  os.environ["MILVUS_USER"] = milvus_user
92
  os.environ["MILVUS_PASSWORD"] = milvus_password
93
  os.environ["MILVUS_DB_NAME"] = milvus_db_name
94
- VECTOR_STORAGE = "MilvusVectorDBStorge"
95
 
96
  # MongoDB config
97
  mongo_uri = config.get("mongodb", "uri", fallback=None)
@@ -99,8 +104,8 @@ mongo_database = config.get("mongodb", "LightRAG", fallback=None)
99
  if mongo_uri:
100
  os.environ["MONGO_URI"] = mongo_uri
101
  os.environ["MONGO_DATABASE"] = mongo_database
102
- KV_STORAGE = "MongoKVStorage"
103
- DOC_STATUS_STORAGE = "MongoKVStorage"
104
 
105
 
106
  def get_default_host(binding_type: str) -> str:
@@ -213,7 +218,7 @@ def display_splash_screen(args: argparse.Namespace) -> None:
213
  # System Configuration
214
  ASCIIColors.magenta("\nπŸ› οΈ System Configuration:")
215
  ASCIIColors.white(" β”œβ”€ Ollama Emulating Model: ", end="")
216
- ASCIIColors.yellow(f"{LIGHTRAG_MODEL}")
217
  ASCIIColors.white(" β”œβ”€ Log Level: ", end="")
218
  ASCIIColors.yellow(f"{args.log_level}")
219
  ASCIIColors.white(" β”œβ”€ Timeout: ", end="")
@@ -484,8 +489,19 @@ def parse_args() -> argparse.Namespace:
484
  help="Number of conversation history turns to include (default: from env or 3)",
485
  )
486
 
 
 
 
 
 
 
 
 
 
487
  args = parser.parse_args()
488
 
 
 
489
  return args
490
 
491
 
@@ -538,7 +554,7 @@ class OllamaMessage(BaseModel):
538
 
539
 
540
  class OllamaChatRequest(BaseModel):
541
- model: str = LIGHTRAG_MODEL
542
  messages: List[OllamaMessage]
543
  stream: bool = True # Default to streaming mode
544
  options: Optional[Dict[str, Any]] = None
@@ -553,7 +569,7 @@ class OllamaChatResponse(BaseModel):
553
 
554
 
555
  class OllamaGenerateRequest(BaseModel):
556
- model: str = LIGHTRAG_MODEL
557
  prompt: str
558
  system: Optional[str] = None
559
  stream: bool = False
@@ -842,10 +858,10 @@ def create_app(args):
842
  if args.llm_binding == "lollms" or args.llm_binding == "ollama"
843
  else {},
844
  embedding_func=embedding_func,
845
- kv_storage=KV_STORAGE,
846
- graph_storage=GRAPH_STORAGE,
847
- vector_storage=VECTOR_STORAGE,
848
- doc_status_storage=DOC_STATUS_STORAGE,
849
  )
850
  else:
851
  rag = LightRAG(
@@ -859,10 +875,10 @@ def create_app(args):
859
  llm_model_max_async=args.max_async,
860
  llm_model_max_token_size=args.max_tokens,
861
  embedding_func=embedding_func,
862
- kv_storage=KV_STORAGE,
863
- graph_storage=GRAPH_STORAGE,
864
- vector_storage=VECTOR_STORAGE,
865
- doc_status_storage=DOC_STATUS_STORAGE,
866
  )
867
 
868
  async def index_file(file_path: Union[str, Path]) -> None:
@@ -1423,16 +1439,16 @@ def create_app(args):
1423
  return OllamaTagResponse(
1424
  models=[
1425
  {
1426
- "name": LIGHTRAG_MODEL,
1427
- "model": LIGHTRAG_MODEL,
1428
- "size": LIGHTRAG_SIZE,
1429
- "digest": LIGHTRAG_DIGEST,
1430
- "modified_at": LIGHTRAG_CREATED_AT,
1431
  "details": {
1432
  "parent_model": "",
1433
  "format": "gguf",
1434
- "family": LIGHTRAG_NAME,
1435
- "families": [LIGHTRAG_NAME],
1436
  "parameter_size": "13B",
1437
  "quantization_level": "Q4_0",
1438
  },
@@ -1495,8 +1511,8 @@ def create_app(args):
1495
  total_response = response
1496
 
1497
  data = {
1498
- "model": LIGHTRAG_MODEL,
1499
- "created_at": LIGHTRAG_CREATED_AT,
1500
  "response": response,
1501
  "done": False,
1502
  }
@@ -1508,8 +1524,8 @@ def create_app(args):
1508
  eval_time = last_chunk_time - first_chunk_time
1509
 
1510
  data = {
1511
- "model": LIGHTRAG_MODEL,
1512
- "created_at": LIGHTRAG_CREATED_AT,
1513
  "done": True,
1514
  "total_duration": total_time,
1515
  "load_duration": 0,
@@ -1529,8 +1545,8 @@ def create_app(args):
1529
 
1530
  total_response += chunk
1531
  data = {
1532
- "model": LIGHTRAG_MODEL,
1533
- "created_at": LIGHTRAG_CREATED_AT,
1534
  "response": chunk,
1535
  "done": False,
1536
  }
@@ -1542,8 +1558,8 @@ def create_app(args):
1542
  eval_time = last_chunk_time - first_chunk_time
1543
 
1544
  data = {
1545
- "model": LIGHTRAG_MODEL,
1546
- "created_at": LIGHTRAG_CREATED_AT,
1547
  "done": True,
1548
  "total_duration": total_time,
1549
  "load_duration": 0,
@@ -1587,8 +1603,8 @@ def create_app(args):
1587
  eval_time = last_chunk_time - first_chunk_time
1588
 
1589
  return {
1590
- "model": LIGHTRAG_MODEL,
1591
- "created_at": LIGHTRAG_CREATED_AT,
1592
  "response": str(response_text),
1593
  "done": True,
1594
  "total_duration": total_time,
@@ -1660,8 +1676,8 @@ def create_app(args):
1660
  total_response = response
1661
 
1662
  data = {
1663
- "model": LIGHTRAG_MODEL,
1664
- "created_at": LIGHTRAG_CREATED_AT,
1665
  "message": {
1666
  "role": "assistant",
1667
  "content": response,
@@ -1677,8 +1693,8 @@ def create_app(args):
1677
  eval_time = last_chunk_time - first_chunk_time
1678
 
1679
  data = {
1680
- "model": LIGHTRAG_MODEL,
1681
- "created_at": LIGHTRAG_CREATED_AT,
1682
  "done": True,
1683
  "total_duration": total_time,
1684
  "load_duration": 0,
@@ -1698,8 +1714,8 @@ def create_app(args):
1698
 
1699
  total_response += chunk
1700
  data = {
1701
- "model": LIGHTRAG_MODEL,
1702
- "created_at": LIGHTRAG_CREATED_AT,
1703
  "message": {
1704
  "role": "assistant",
1705
  "content": chunk,
@@ -1715,8 +1731,8 @@ def create_app(args):
1715
  eval_time = last_chunk_time - first_chunk_time
1716
 
1717
  data = {
1718
- "model": LIGHTRAG_MODEL,
1719
- "created_at": LIGHTRAG_CREATED_AT,
1720
  "done": True,
1721
  "total_duration": total_time,
1722
  "load_duration": 0,
@@ -1771,8 +1787,8 @@ def create_app(args):
1771
  eval_time = last_chunk_time - first_chunk_time
1772
 
1773
  return {
1774
- "model": LIGHTRAG_MODEL,
1775
- "created_at": LIGHTRAG_CREATED_AT,
1776
  "message": {
1777
  "role": "assistant",
1778
  "content": str(response_text),
@@ -1815,10 +1831,10 @@ def create_app(args):
1815
  "embedding_binding_host": args.embedding_binding_host,
1816
  "embedding_model": args.embedding_model,
1817
  "max_tokens": args.max_tokens,
1818
- "kv_storage": KV_STORAGE,
1819
- "doc_status_storage": DOC_STATUS_STORAGE,
1820
- "graph_storage": GRAPH_STORAGE,
1821
- "vector_storage": VECTOR_STORAGE,
1822
  },
1823
  }
1824
 
 
48
  return int(tokens)
49
 
50
 
51
+ class OllamaServerInfos:
52
+ # Constants for emulated Ollama model information
53
+ LIGHTRAG_NAME = "lightrag"
54
+ LIGHTRAG_TAG = os.getenv("OLLAMA_EMULATING_MODEL_TAG", "latest")
55
+ LIGHTRAG_MODEL = f"{LIGHTRAG_NAME}:{LIGHTRAG_TAG}"
56
+ LIGHTRAG_SIZE = 7365960935 # it's a dummy value
57
+ LIGHTRAG_CREATED_AT = "2024-01-15T00:00:00Z"
58
+ LIGHTRAG_DIGEST = "sha256:lightrag"
59
+
60
+ KV_STORAGE = "JsonKVStorage"
61
+ DOC_STATUS_STORAGE = "JsonDocStatusStorage"
62
+ GRAPH_STORAGE = "NetworkXStorage"
63
+ VECTOR_STORAGE = "NanoVectorDBStorage"
64
+
65
+
66
+ # Add infos
67
+ ollama_server_infos = OllamaServerInfos()
68
 
69
  # read config.ini
70
  config = configparser.ConfigParser()
 
73
  redis_uri = config.get("redis", "uri", fallback=None)
74
  if redis_uri:
75
  os.environ["REDIS_URI"] = redis_uri
76
+ ollama_server_infos.KV_STORAGE = "RedisKVStorage"
77
+ ollama_server_infos.DOC_STATUS_STORAGE = "RedisKVStorage"
78
 
79
  # Neo4j config
80
  neo4j_uri = config.get("neo4j", "uri", fallback=None)
 
84
  os.environ["NEO4J_URI"] = neo4j_uri
85
  os.environ["NEO4J_USERNAME"] = neo4j_username
86
  os.environ["NEO4J_PASSWORD"] = neo4j_password
87
+ ollama_server_infos.GRAPH_STORAGE = "Neo4JStorage"
88
 
89
  # Milvus config
90
  milvus_uri = config.get("milvus", "uri", fallback=None)
 
96
  os.environ["MILVUS_USER"] = milvus_user
97
  os.environ["MILVUS_PASSWORD"] = milvus_password
98
  os.environ["MILVUS_DB_NAME"] = milvus_db_name
99
+ ollama_server_infos.VECTOR_STORAGE = "MilvusVectorDBStorge"
100
 
101
  # MongoDB config
102
  mongo_uri = config.get("mongodb", "uri", fallback=None)
 
104
  if mongo_uri:
105
  os.environ["MONGO_URI"] = mongo_uri
106
  os.environ["MONGO_DATABASE"] = mongo_database
107
+ ollama_server_infos.KV_STORAGE = "MongoKVStorage"
108
+ ollama_server_infos.DOC_STATUS_STORAGE = "MongoKVStorage"
109
 
110
 
111
  def get_default_host(binding_type: str) -> str:
 
218
  # System Configuration
219
  ASCIIColors.magenta("\nπŸ› οΈ System Configuration:")
220
  ASCIIColors.white(" β”œβ”€ Ollama Emulating Model: ", end="")
221
+ ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
222
  ASCIIColors.white(" β”œβ”€ Log Level: ", end="")
223
  ASCIIColors.yellow(f"{args.log_level}")
224
  ASCIIColors.white(" β”œβ”€ Timeout: ", end="")
 
489
  help="Number of conversation history turns to include (default: from env or 3)",
490
  )
491
 
492
+ parser.add_argument(
493
+ "--simulated-model-name",
494
+ type=str,
495
+ default=get_env_value(
496
+ "SIMULATED_MODEL_NAME", ollama_server_infos.LIGHTRAG_MODEL
497
+ ),
498
+ help="Number of conversation history turns to include (default: from env or 3)",
499
+ )
500
+
501
  args = parser.parse_args()
502
 
503
+ ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
504
+
505
  return args
506
 
507
 
 
554
 
555
 
556
  class OllamaChatRequest(BaseModel):
557
+ model: str = ollama_server_infos.LIGHTRAG_MODEL
558
  messages: List[OllamaMessage]
559
  stream: bool = True # Default to streaming mode
560
  options: Optional[Dict[str, Any]] = None
 
569
 
570
 
571
  class OllamaGenerateRequest(BaseModel):
572
+ model: str = ollama_server_infos.LIGHTRAG_MODEL
573
  prompt: str
574
  system: Optional[str] = None
575
  stream: bool = False
 
858
  if args.llm_binding == "lollms" or args.llm_binding == "ollama"
859
  else {},
860
  embedding_func=embedding_func,
861
+ kv_storage=ollama_server_infos.KV_STORAGE,
862
+ graph_storage=ollama_server_infos.GRAPH_STORAGE,
863
+ vector_storage=ollama_server_infos.VECTOR_STORAGE,
864
+ doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
865
  )
866
  else:
867
  rag = LightRAG(
 
875
  llm_model_max_async=args.max_async,
876
  llm_model_max_token_size=args.max_tokens,
877
  embedding_func=embedding_func,
878
+ kv_storage=ollama_server_infos.KV_STORAGE,
879
+ graph_storage=ollama_server_infos.GRAPH_STORAGE,
880
+ vector_storage=ollama_server_infos.VECTOR_STORAGE,
881
+ doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
882
  )
883
 
884
  async def index_file(file_path: Union[str, Path]) -> None:
 
1439
  return OllamaTagResponse(
1440
  models=[
1441
  {
1442
+ "name": ollama_server_infos.LIGHTRAG_MODEL,
1443
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1444
+ "size": ollama_server_infos.LIGHTRAG_SIZE,
1445
+ "digest": ollama_server_infos.LIGHTRAG_DIGEST,
1446
+ "modified_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1447
  "details": {
1448
  "parent_model": "",
1449
  "format": "gguf",
1450
+ "family": ollama_server_infos.LIGHTRAG_NAME,
1451
+ "families": [ollama_server_infos.LIGHTRAG_NAME],
1452
  "parameter_size": "13B",
1453
  "quantization_level": "Q4_0",
1454
  },
 
1511
  total_response = response
1512
 
1513
  data = {
1514
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1515
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1516
  "response": response,
1517
  "done": False,
1518
  }
 
1524
  eval_time = last_chunk_time - first_chunk_time
1525
 
1526
  data = {
1527
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1528
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1529
  "done": True,
1530
  "total_duration": total_time,
1531
  "load_duration": 0,
 
1545
 
1546
  total_response += chunk
1547
  data = {
1548
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1549
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1550
  "response": chunk,
1551
  "done": False,
1552
  }
 
1558
  eval_time = last_chunk_time - first_chunk_time
1559
 
1560
  data = {
1561
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1562
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1563
  "done": True,
1564
  "total_duration": total_time,
1565
  "load_duration": 0,
 
1603
  eval_time = last_chunk_time - first_chunk_time
1604
 
1605
  return {
1606
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1607
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1608
  "response": str(response_text),
1609
  "done": True,
1610
  "total_duration": total_time,
 
1676
  total_response = response
1677
 
1678
  data = {
1679
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1680
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1681
  "message": {
1682
  "role": "assistant",
1683
  "content": response,
 
1693
  eval_time = last_chunk_time - first_chunk_time
1694
 
1695
  data = {
1696
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1697
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1698
  "done": True,
1699
  "total_duration": total_time,
1700
  "load_duration": 0,
 
1714
 
1715
  total_response += chunk
1716
  data = {
1717
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1718
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1719
  "message": {
1720
  "role": "assistant",
1721
  "content": chunk,
 
1731
  eval_time = last_chunk_time - first_chunk_time
1732
 
1733
  data = {
1734
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1735
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1736
  "done": True,
1737
  "total_duration": total_time,
1738
  "load_duration": 0,
 
1787
  eval_time = last_chunk_time - first_chunk_time
1788
 
1789
  return {
1790
+ "model": ollama_server_infos.LIGHTRAG_MODEL,
1791
+ "created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
1792
  "message": {
1793
  "role": "assistant",
1794
  "content": str(response_text),
 
1831
  "embedding_binding_host": args.embedding_binding_host,
1832
  "embedding_model": args.embedding_model,
1833
  "max_tokens": args.max_tokens,
1834
+ "kv_storage": ollama_server_infos.KV_STORAGE,
1835
+ "doc_status_storage": ollama_server_infos.DOC_STATUS_STORAGE,
1836
+ "graph_storage": ollama_server_infos.GRAPH_STORAGE,
1837
+ "vector_storage": ollama_server_infos.VECTOR_STORAGE,
1838
  },
1839
  }
1840