LarFii
commited on
Commit
·
9916565
1
Parent(s):
197300a
Linting
Browse files- Dockerfile +1 -1
- README.md +4 -5
- examples/lightrag_api_openai_compatible_demo.py +13 -6
- lightrag/__init__.py +1 -1
- lightrag/kg/__init__.py +0 -2
- lightrag/kg/neo4j_impl.py +2 -2
- lightrag/lightrag.py +0 -1
- lightrag/operate.py +12 -13
- test.py +1 -1
- test_neo4j.py +1 -1
Dockerfile
CHANGED
@@ -53,4 +53,4 @@ VOLUME /data /logs
|
|
53 |
EXPOSE 7474 7473 7687
|
54 |
|
55 |
ENTRYPOINT ["tini", "-g", "--", "/startup/docker-entrypoint.sh"]
|
56 |
-
CMD ["neo4j"]
|
|
|
53 |
EXPOSE 7474 7473 7687
|
54 |
|
55 |
ENTRYPOINT ["tini", "-g", "--", "/startup/docker-entrypoint.sh"]
|
56 |
+
CMD ["neo4j"]
|
README.md
CHANGED
@@ -196,7 +196,7 @@ rag = LightRAG(
|
|
196 |
### Using Neo4J for Storage
|
197 |
|
198 |
* For production level scenarios you will most likely want to leverage an enterprise solution
|
199 |
-
* for KG storage. Running Neo4J in Docker is recommended for seamless local testing.
|
200 |
* See: https://hub.docker.com/_/neo4j
|
201 |
|
202 |
|
@@ -209,7 +209,7 @@ When you launch the project be sure to override the default KG: NetworkS
|
|
209 |
by specifying kg="Neo4JStorage".
|
210 |
|
211 |
# Note: Default settings use NetworkX
|
212 |
-
#Initialize LightRAG with Neo4J implementation.
|
213 |
WORKING_DIR = "./local_neo4jWorkDir"
|
214 |
|
215 |
rag = LightRAG(
|
@@ -503,8 +503,8 @@ pip install fastapi uvicorn pydantic
|
|
503 |
export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default"
|
504 |
export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1"
|
505 |
export OPENAI_API_KEY="Your OpenAI API key" # Required
|
506 |
-
export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini"
|
507 |
-
export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large"
|
508 |
```
|
509 |
|
510 |
3. Run the API server:
|
@@ -923,4 +923,3 @@ primaryClass={cs.IR}
|
|
923 |
}
|
924 |
```
|
925 |
**Thank you for your interest in our work!**
|
926 |
-
|
|
|
196 |
### Using Neo4J for Storage
|
197 |
|
198 |
* For production level scenarios you will most likely want to leverage an enterprise solution
|
199 |
+
* for KG storage. Running Neo4J in Docker is recommended for seamless local testing.
|
200 |
* See: https://hub.docker.com/_/neo4j
|
201 |
|
202 |
|
|
|
209 |
by specifying kg="Neo4JStorage".
|
210 |
|
211 |
# Note: Default settings use NetworkX
|
212 |
+
#Initialize LightRAG with Neo4J implementation.
|
213 |
WORKING_DIR = "./local_neo4jWorkDir"
|
214 |
|
215 |
rag = LightRAG(
|
|
|
503 |
export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default"
|
504 |
export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1"
|
505 |
export OPENAI_API_KEY="Your OpenAI API key" # Required
|
506 |
+
export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini"
|
507 |
+
export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large"
|
508 |
```
|
509 |
|
510 |
3. Run the API server:
|
|
|
923 |
}
|
924 |
```
|
925 |
**Thank you for your interest in our work!**
|
|
examples/lightrag_api_openai_compatible_demo.py
CHANGED
@@ -33,7 +33,7 @@ if not os.path.exists(WORKING_DIR):
|
|
33 |
|
34 |
|
35 |
async def llm_model_func(
|
36 |
-
|
37 |
) -> str:
|
38 |
return await openai_complete_if_cache(
|
39 |
LLM_MODEL,
|
@@ -66,9 +66,11 @@ async def get_embedding_dim():
|
|
66 |
rag = LightRAG(
|
67 |
working_dir=WORKING_DIR,
|
68 |
llm_model_func=llm_model_func,
|
69 |
-
embedding_func=EmbeddingFunc(
|
70 |
-
|
71 |
-
|
|
|
|
|
72 |
)
|
73 |
|
74 |
|
@@ -99,8 +101,13 @@ async def query_endpoint(request: QueryRequest):
|
|
99 |
try:
|
100 |
loop = asyncio.get_event_loop()
|
101 |
result = await loop.run_in_executor(
|
102 |
-
None,
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
104 |
)
|
105 |
return Response(status="success", data=result)
|
106 |
except Exception as e:
|
|
|
33 |
|
34 |
|
35 |
async def llm_model_func(
|
36 |
+
prompt, system_prompt=None, history_messages=[], **kwargs
|
37 |
) -> str:
|
38 |
return await openai_complete_if_cache(
|
39 |
LLM_MODEL,
|
|
|
66 |
rag = LightRAG(
|
67 |
working_dir=WORKING_DIR,
|
68 |
llm_model_func=llm_model_func,
|
69 |
+
embedding_func=EmbeddingFunc(
|
70 |
+
embedding_dim=asyncio.run(get_embedding_dim()),
|
71 |
+
max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
|
72 |
+
func=embedding_func,
|
73 |
+
),
|
74 |
)
|
75 |
|
76 |
|
|
|
101 |
try:
|
102 |
loop = asyncio.get_event_loop()
|
103 |
result = await loop.run_in_executor(
|
104 |
+
None,
|
105 |
+
lambda: rag.query(
|
106 |
+
request.query,
|
107 |
+
param=QueryParam(
|
108 |
+
mode=request.mode, only_need_context=request.only_need_context
|
109 |
+
),
|
110 |
+
),
|
111 |
)
|
112 |
return Response(status="success", data=result)
|
113 |
except Exception as e:
|
lightrag/__init__.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
2 |
|
3 |
-
__version__ = "0.0.
|
4 |
__author__ = "Zirui Guo"
|
5 |
__url__ = "https://github.com/HKUDS/LightRAG"
|
|
|
1 |
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
2 |
|
3 |
+
__version__ = "0.0.9"
|
4 |
__author__ = "Zirui Guo"
|
5 |
__url__ = "https://github.com/HKUDS/LightRAG"
|
lightrag/kg/__init__.py
CHANGED
@@ -1,3 +1 @@
|
|
1 |
# print ("init package vars here. ......")
|
2 |
-
|
3 |
-
|
|
|
1 |
# print ("init package vars here. ......")
|
|
|
|
lightrag/kg/neo4j_impl.py
CHANGED
@@ -146,11 +146,11 @@ class Neo4JStorage(BaseGraphStorage):
|
|
146 |
entity_name_label_target = target_node_id.strip('"')
|
147 |
"""
|
148 |
Find all edges between nodes of two given labels
|
149 |
-
|
150 |
Args:
|
151 |
source_node_label (str): Label of the source nodes
|
152 |
target_node_label (str): Label of the target nodes
|
153 |
-
|
154 |
Returns:
|
155 |
list: List of all relationships/edges found
|
156 |
"""
|
|
|
146 |
entity_name_label_target = target_node_id.strip('"')
|
147 |
"""
|
148 |
Find all edges between nodes of two given labels
|
149 |
+
|
150 |
Args:
|
151 |
source_node_label (str): Label of the source nodes
|
152 |
target_node_label (str): Label of the target nodes
|
153 |
+
|
154 |
Returns:
|
155 |
list: List of all relationships/edges found
|
156 |
"""
|
lightrag/lightrag.py
CHANGED
@@ -61,7 +61,6 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
|
61 |
return loop
|
62 |
|
63 |
|
64 |
-
|
65 |
@dataclass
|
66 |
class LightRAG:
|
67 |
working_dir: str = field(
|
|
|
61 |
return loop
|
62 |
|
63 |
|
|
|
64 |
@dataclass
|
65 |
class LightRAG:
|
66 |
working_dir: str = field(
|
lightrag/operate.py
CHANGED
@@ -560,19 +560,19 @@ async def _find_most_related_text_unit_from_entities(
|
|
560 |
if not this_edges:
|
561 |
continue
|
562 |
all_one_hop_nodes.update([e[1] for e in this_edges])
|
563 |
-
|
564 |
all_one_hop_nodes = list(all_one_hop_nodes)
|
565 |
all_one_hop_nodes_data = await asyncio.gather(
|
566 |
*[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
|
567 |
)
|
568 |
-
|
569 |
# Add null check for node data
|
570 |
all_one_hop_text_units_lookup = {
|
571 |
k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
|
572 |
for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
|
573 |
if v is not None and "source_id" in v # Add source_id check
|
574 |
}
|
575 |
-
|
576 |
all_text_units_lookup = {}
|
577 |
for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
|
578 |
for c_id in this_text_units:
|
@@ -586,7 +586,7 @@ async def _find_most_related_text_unit_from_entities(
|
|
586 |
and c_id in all_one_hop_text_units_lookup[e[1]]
|
587 |
):
|
588 |
relation_counts += 1
|
589 |
-
|
590 |
chunk_data = await text_chunks_db.get_by_id(c_id)
|
591 |
if chunk_data is not None and "content" in chunk_data: # Add content check
|
592 |
all_text_units_lookup[c_id] = {
|
@@ -594,29 +594,28 @@ async def _find_most_related_text_unit_from_entities(
|
|
594 |
"order": index,
|
595 |
"relation_counts": relation_counts,
|
596 |
}
|
597 |
-
|
598 |
# Filter out None values and ensure data has content
|
599 |
all_text_units = [
|
600 |
-
{"id": k, **v}
|
601 |
-
for k, v in all_text_units_lookup.items()
|
602 |
if v is not None and v.get("data") is not None and "content" in v["data"]
|
603 |
]
|
604 |
-
|
605 |
if not all_text_units:
|
606 |
logger.warning("No valid text units found")
|
607 |
return []
|
608 |
-
|
609 |
all_text_units = sorted(
|
610 |
-
all_text_units,
|
611 |
-
key=lambda x: (x["order"], -x["relation_counts"])
|
612 |
)
|
613 |
-
|
614 |
all_text_units = truncate_list_by_token_size(
|
615 |
all_text_units,
|
616 |
key=lambda x: x["data"]["content"],
|
617 |
max_token_size=query_param.max_token_for_text_unit,
|
618 |
)
|
619 |
-
|
620 |
all_text_units = [t["data"] for t in all_text_units]
|
621 |
return all_text_units
|
622 |
|
|
|
560 |
if not this_edges:
|
561 |
continue
|
562 |
all_one_hop_nodes.update([e[1] for e in this_edges])
|
563 |
+
|
564 |
all_one_hop_nodes = list(all_one_hop_nodes)
|
565 |
all_one_hop_nodes_data = await asyncio.gather(
|
566 |
*[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
|
567 |
)
|
568 |
+
|
569 |
# Add null check for node data
|
570 |
all_one_hop_text_units_lookup = {
|
571 |
k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
|
572 |
for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
|
573 |
if v is not None and "source_id" in v # Add source_id check
|
574 |
}
|
575 |
+
|
576 |
all_text_units_lookup = {}
|
577 |
for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
|
578 |
for c_id in this_text_units:
|
|
|
586 |
and c_id in all_one_hop_text_units_lookup[e[1]]
|
587 |
):
|
588 |
relation_counts += 1
|
589 |
+
|
590 |
chunk_data = await text_chunks_db.get_by_id(c_id)
|
591 |
if chunk_data is not None and "content" in chunk_data: # Add content check
|
592 |
all_text_units_lookup[c_id] = {
|
|
|
594 |
"order": index,
|
595 |
"relation_counts": relation_counts,
|
596 |
}
|
597 |
+
|
598 |
# Filter out None values and ensure data has content
|
599 |
all_text_units = [
|
600 |
+
{"id": k, **v}
|
601 |
+
for k, v in all_text_units_lookup.items()
|
602 |
if v is not None and v.get("data") is not None and "content" in v["data"]
|
603 |
]
|
604 |
+
|
605 |
if not all_text_units:
|
606 |
logger.warning("No valid text units found")
|
607 |
return []
|
608 |
+
|
609 |
all_text_units = sorted(
|
610 |
+
all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
|
|
|
611 |
)
|
612 |
+
|
613 |
all_text_units = truncate_list_by_token_size(
|
614 |
all_text_units,
|
615 |
key=lambda x: x["data"]["content"],
|
616 |
max_token_size=query_param.max_token_for_text_unit,
|
617 |
)
|
618 |
+
|
619 |
all_text_units = [t["data"] for t in all_text_units]
|
620 |
return all_text_units
|
621 |
|
test.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
from lightrag import LightRAG, QueryParam
|
3 |
-
from lightrag.llm import gpt_4o_mini_complete
|
4 |
#########
|
5 |
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
|
6 |
# import nest_asyncio
|
|
|
1 |
import os
|
2 |
from lightrag import LightRAG, QueryParam
|
3 |
+
from lightrag.llm import gpt_4o_mini_complete
|
4 |
#########
|
5 |
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
|
6 |
# import nest_asyncio
|
test_neo4j.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
from lightrag import LightRAG, QueryParam
|
3 |
-
from lightrag.llm import gpt_4o_mini_complete
|
4 |
|
5 |
|
6 |
#########
|
|
|
1 |
import os
|
2 |
from lightrag import LightRAG, QueryParam
|
3 |
+
from lightrag.llm import gpt_4o_mini_complete
|
4 |
|
5 |
|
6 |
#########
|