LarFii commited on
Commit
9916565
·
1 Parent(s): 197300a
Dockerfile CHANGED
@@ -53,4 +53,4 @@ VOLUME /data /logs
53
  EXPOSE 7474 7473 7687
54
 
55
  ENTRYPOINT ["tini", "-g", "--", "/startup/docker-entrypoint.sh"]
56
- CMD ["neo4j"]
 
53
  EXPOSE 7474 7473 7687
54
 
55
  ENTRYPOINT ["tini", "-g", "--", "/startup/docker-entrypoint.sh"]
56
+ CMD ["neo4j"]
README.md CHANGED
@@ -196,7 +196,7 @@ rag = LightRAG(
196
  ### Using Neo4J for Storage
197
 
198
  * For production level scenarios you will most likely want to leverage an enterprise solution
199
- * for KG storage. Running Neo4J in Docker is recommended for seamless local testing.
200
  * See: https://hub.docker.com/_/neo4j
201
 
202
 
@@ -209,7 +209,7 @@ When you launch the project be sure to override the default KG: NetworkS
209
  by specifying kg="Neo4JStorage".
210
 
211
  # Note: Default settings use NetworkX
212
- #Initialize LightRAG with Neo4J implementation.
213
  WORKING_DIR = "./local_neo4jWorkDir"
214
 
215
  rag = LightRAG(
@@ -503,8 +503,8 @@ pip install fastapi uvicorn pydantic
503
  export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default"
504
  export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1"
505
  export OPENAI_API_KEY="Your OpenAI API key" # Required
506
- export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini"
507
- export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large"
508
  ```
509
 
510
  3. Run the API server:
@@ -923,4 +923,3 @@ primaryClass={cs.IR}
923
  }
924
  ```
925
  **Thank you for your interest in our work!**
926
-
 
196
  ### Using Neo4J for Storage
197
 
198
  * For production level scenarios you will most likely want to leverage an enterprise solution
199
+ * for KG storage. Running Neo4J in Docker is recommended for seamless local testing.
200
  * See: https://hub.docker.com/_/neo4j
201
 
202
 
 
209
  by specifying kg="Neo4JStorage".
210
 
211
  # Note: Default settings use NetworkX
212
+ #Initialize LightRAG with Neo4J implementation.
213
  WORKING_DIR = "./local_neo4jWorkDir"
214
 
215
  rag = LightRAG(
 
503
  export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default"
504
  export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1"
505
  export OPENAI_API_KEY="Your OpenAI API key" # Required
506
+ export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini"
507
+ export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large"
508
  ```
509
 
510
  3. Run the API server:
 
923
  }
924
  ```
925
  **Thank you for your interest in our work!**
 
examples/lightrag_api_openai_compatible_demo.py CHANGED
@@ -33,7 +33,7 @@ if not os.path.exists(WORKING_DIR):
33
 
34
 
35
  async def llm_model_func(
36
- prompt, system_prompt=None, history_messages=[], **kwargs
37
  ) -> str:
38
  return await openai_complete_if_cache(
39
  LLM_MODEL,
@@ -66,9 +66,11 @@ async def get_embedding_dim():
66
  rag = LightRAG(
67
  working_dir=WORKING_DIR,
68
  llm_model_func=llm_model_func,
69
- embedding_func=EmbeddingFunc(embedding_dim=asyncio.run(get_embedding_dim()),
70
- max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
71
- func=embedding_func),
 
 
72
  )
73
 
74
 
@@ -99,8 +101,13 @@ async def query_endpoint(request: QueryRequest):
99
  try:
100
  loop = asyncio.get_event_loop()
101
  result = await loop.run_in_executor(
102
- None, lambda: rag.query(request.query,
103
- param=QueryParam(mode=request.mode, only_need_context=request.only_need_context))
 
 
 
 
 
104
  )
105
  return Response(status="success", data=result)
106
  except Exception as e:
 
33
 
34
 
35
  async def llm_model_func(
36
+ prompt, system_prompt=None, history_messages=[], **kwargs
37
  ) -> str:
38
  return await openai_complete_if_cache(
39
  LLM_MODEL,
 
66
  rag = LightRAG(
67
  working_dir=WORKING_DIR,
68
  llm_model_func=llm_model_func,
69
+ embedding_func=EmbeddingFunc(
70
+ embedding_dim=asyncio.run(get_embedding_dim()),
71
+ max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
72
+ func=embedding_func,
73
+ ),
74
  )
75
 
76
 
 
101
  try:
102
  loop = asyncio.get_event_loop()
103
  result = await loop.run_in_executor(
104
+ None,
105
+ lambda: rag.query(
106
+ request.query,
107
+ param=QueryParam(
108
+ mode=request.mode, only_need_context=request.only_need_context
109
+ ),
110
+ ),
111
  )
112
  return Response(status="success", data=result)
113
  except Exception as e:
lightrag/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
2
 
3
- __version__ = "0.0.8"
4
  __author__ = "Zirui Guo"
5
  __url__ = "https://github.com/HKUDS/LightRAG"
 
1
  from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
2
 
3
+ __version__ = "0.0.9"
4
  __author__ = "Zirui Guo"
5
  __url__ = "https://github.com/HKUDS/LightRAG"
lightrag/kg/__init__.py CHANGED
@@ -1,3 +1 @@
1
  # print ("init package vars here. ......")
2
-
3
-
 
1
  # print ("init package vars here. ......")
 
 
lightrag/kg/neo4j_impl.py CHANGED
@@ -146,11 +146,11 @@ class Neo4JStorage(BaseGraphStorage):
146
  entity_name_label_target = target_node_id.strip('"')
147
  """
148
  Find all edges between nodes of two given labels
149
-
150
  Args:
151
  source_node_label (str): Label of the source nodes
152
  target_node_label (str): Label of the target nodes
153
-
154
  Returns:
155
  list: List of all relationships/edges found
156
  """
 
146
  entity_name_label_target = target_node_id.strip('"')
147
  """
148
  Find all edges between nodes of two given labels
149
+
150
  Args:
151
  source_node_label (str): Label of the source nodes
152
  target_node_label (str): Label of the target nodes
153
+
154
  Returns:
155
  list: List of all relationships/edges found
156
  """
lightrag/lightrag.py CHANGED
@@ -61,7 +61,6 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
61
  return loop
62
 
63
 
64
-
65
  @dataclass
66
  class LightRAG:
67
  working_dir: str = field(
 
61
  return loop
62
 
63
 
 
64
  @dataclass
65
  class LightRAG:
66
  working_dir: str = field(
lightrag/operate.py CHANGED
@@ -560,19 +560,19 @@ async def _find_most_related_text_unit_from_entities(
560
  if not this_edges:
561
  continue
562
  all_one_hop_nodes.update([e[1] for e in this_edges])
563
-
564
  all_one_hop_nodes = list(all_one_hop_nodes)
565
  all_one_hop_nodes_data = await asyncio.gather(
566
  *[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
567
  )
568
-
569
  # Add null check for node data
570
  all_one_hop_text_units_lookup = {
571
  k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
572
  for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
573
  if v is not None and "source_id" in v # Add source_id check
574
  }
575
-
576
  all_text_units_lookup = {}
577
  for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
578
  for c_id in this_text_units:
@@ -586,7 +586,7 @@ async def _find_most_related_text_unit_from_entities(
586
  and c_id in all_one_hop_text_units_lookup[e[1]]
587
  ):
588
  relation_counts += 1
589
-
590
  chunk_data = await text_chunks_db.get_by_id(c_id)
591
  if chunk_data is not None and "content" in chunk_data: # Add content check
592
  all_text_units_lookup[c_id] = {
@@ -594,29 +594,28 @@ async def _find_most_related_text_unit_from_entities(
594
  "order": index,
595
  "relation_counts": relation_counts,
596
  }
597
-
598
  # Filter out None values and ensure data has content
599
  all_text_units = [
600
- {"id": k, **v}
601
- for k, v in all_text_units_lookup.items()
602
  if v is not None and v.get("data") is not None and "content" in v["data"]
603
  ]
604
-
605
  if not all_text_units:
606
  logger.warning("No valid text units found")
607
  return []
608
-
609
  all_text_units = sorted(
610
- all_text_units,
611
- key=lambda x: (x["order"], -x["relation_counts"])
612
  )
613
-
614
  all_text_units = truncate_list_by_token_size(
615
  all_text_units,
616
  key=lambda x: x["data"]["content"],
617
  max_token_size=query_param.max_token_for_text_unit,
618
  )
619
-
620
  all_text_units = [t["data"] for t in all_text_units]
621
  return all_text_units
622
 
 
560
  if not this_edges:
561
  continue
562
  all_one_hop_nodes.update([e[1] for e in this_edges])
563
+
564
  all_one_hop_nodes = list(all_one_hop_nodes)
565
  all_one_hop_nodes_data = await asyncio.gather(
566
  *[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
567
  )
568
+
569
  # Add null check for node data
570
  all_one_hop_text_units_lookup = {
571
  k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
572
  for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
573
  if v is not None and "source_id" in v # Add source_id check
574
  }
575
+
576
  all_text_units_lookup = {}
577
  for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
578
  for c_id in this_text_units:
 
586
  and c_id in all_one_hop_text_units_lookup[e[1]]
587
  ):
588
  relation_counts += 1
589
+
590
  chunk_data = await text_chunks_db.get_by_id(c_id)
591
  if chunk_data is not None and "content" in chunk_data: # Add content check
592
  all_text_units_lookup[c_id] = {
 
594
  "order": index,
595
  "relation_counts": relation_counts,
596
  }
597
+
598
  # Filter out None values and ensure data has content
599
  all_text_units = [
600
+ {"id": k, **v}
601
+ for k, v in all_text_units_lookup.items()
602
  if v is not None and v.get("data") is not None and "content" in v["data"]
603
  ]
604
+
605
  if not all_text_units:
606
  logger.warning("No valid text units found")
607
  return []
608
+
609
  all_text_units = sorted(
610
+ all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
 
611
  )
612
+
613
  all_text_units = truncate_list_by_token_size(
614
  all_text_units,
615
  key=lambda x: x["data"]["content"],
616
  max_token_size=query_param.max_token_for_text_unit,
617
  )
618
+
619
  all_text_units = [t["data"] for t in all_text_units]
620
  return all_text_units
621
 
test.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  from lightrag import LightRAG, QueryParam
3
- from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
4
  #########
5
  # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
6
  # import nest_asyncio
 
1
  import os
2
  from lightrag import LightRAG, QueryParam
3
+ from lightrag.llm import gpt_4o_mini_complete
4
  #########
5
  # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
6
  # import nest_asyncio
test_neo4j.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  from lightrag import LightRAG, QueryParam
3
- from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
4
 
5
 
6
  #########
 
1
  import os
2
  from lightrag import LightRAG, QueryParam
3
+ from lightrag.llm import gpt_4o_mini_complete
4
 
5
 
6
  #########