cleanup code
Browse files- docker-compose.yml +0 -2
- examples/lightrag_api_oracle_demo.py +0 -1
- examples/lightrag_openai_compatible_stream_demo.py +0 -7
- examples/lightrag_tidb_demo.py +0 -1
- lightrag/lightrag.py +42 -79
- lightrag/utils.py +44 -0
- reproduce/Step_3.py +1 -9
- reproduce/Step_3_openai_compatible.py +2 -9
docker-compose.yml
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
version: '3.8'
|
2 |
-
|
3 |
services:
|
4 |
lightrag:
|
5 |
build: .
|
|
|
|
|
|
|
1 |
services:
|
2 |
lightrag:
|
3 |
build: .
|
examples/lightrag_api_oracle_demo.py
CHANGED
@@ -98,7 +98,6 @@ async def init():
|
|
98 |
|
99 |
# Initialize LightRAG
|
100 |
# We use Oracle DB as the KV/vector/graph storage
|
101 |
-
# You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
|
102 |
rag = LightRAG(
|
103 |
enable_llm_cache=False,
|
104 |
working_dir=WORKING_DIR,
|
|
|
98 |
|
99 |
# Initialize LightRAG
|
100 |
# We use Oracle DB as the KV/vector/graph storage
|
|
|
101 |
rag = LightRAG(
|
102 |
enable_llm_cache=False,
|
103 |
working_dir=WORKING_DIR,
|
examples/lightrag_openai_compatible_stream_demo.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
import os
|
2 |
-
import inspect
|
3 |
from lightrag import LightRAG
|
4 |
from lightrag.llm import openai_complete, openai_embed
|
5 |
from lightrag.utils import EmbeddingFunc
|
6 |
-
from lightrag.lightrag import always_get_an_event_loop
|
7 |
from lightrag import QueryParam
|
8 |
|
9 |
# WorkingDir
|
@@ -48,8 +46,3 @@ async def print_stream(stream):
|
|
48 |
print(chunk, end="", flush=True)
|
49 |
|
50 |
|
51 |
-
loop = always_get_an_event_loop()
|
52 |
-
if inspect.isasyncgen(resp):
|
53 |
-
loop.run_until_complete(print_stream(resp))
|
54 |
-
else:
|
55 |
-
print(resp)
|
|
|
1 |
import os
|
|
|
2 |
from lightrag import LightRAG
|
3 |
from lightrag.llm import openai_complete, openai_embed
|
4 |
from lightrag.utils import EmbeddingFunc
|
|
|
5 |
from lightrag import QueryParam
|
6 |
|
7 |
# WorkingDir
|
|
|
46 |
print(chunk, end="", flush=True)
|
47 |
|
48 |
|
|
|
|
|
|
|
|
|
|
examples/lightrag_tidb_demo.py
CHANGED
@@ -63,7 +63,6 @@ async def main():
|
|
63 |
|
64 |
# Initialize LightRAG
|
65 |
# We use TiDB DB as the KV/vector
|
66 |
-
# You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
|
67 |
rag = LightRAG(
|
68 |
enable_llm_cache=False,
|
69 |
working_dir=WORKING_DIR,
|
|
|
63 |
|
64 |
# Initialize LightRAG
|
65 |
# We use TiDB DB as the KV/vector
|
|
|
66 |
rag = LightRAG(
|
67 |
enable_llm_cache=False,
|
68 |
working_dir=WORKING_DIR,
|
lightrag/lightrag.py
CHANGED
@@ -32,8 +32,10 @@ from .operate import (
|
|
32 |
from .prompt import GRAPH_FIELD_SEP
|
33 |
from .utils import (
|
34 |
EmbeddingFunc,
|
|
|
35 |
compute_mdhash_id,
|
36 |
convert_response_to_json,
|
|
|
37 |
limit_async_func_call,
|
38 |
logger,
|
39 |
set_logger,
|
@@ -182,48 +184,9 @@ STORAGES = {
|
|
182 |
}
|
183 |
|
184 |
|
185 |
-
def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any]:
|
186 |
-
"""Lazily import a class from an external module based on the package of the caller."""
|
187 |
-
# Get the caller's module and package
|
188 |
-
import inspect
|
189 |
|
190 |
-
caller_frame = inspect.currentframe().f_back
|
191 |
-
module = inspect.getmodule(caller_frame)
|
192 |
-
package = module.__package__ if module else None
|
193 |
|
194 |
-
def import_class(*args: Any, **kwargs: Any):
|
195 |
-
import importlib
|
196 |
|
197 |
-
module = importlib.import_module(module_name, package=package)
|
198 |
-
cls = getattr(module, class_name)
|
199 |
-
return cls(*args, **kwargs)
|
200 |
-
|
201 |
-
return import_class
|
202 |
-
|
203 |
-
|
204 |
-
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
205 |
-
"""
|
206 |
-
Ensure that there is always an event loop available.
|
207 |
-
|
208 |
-
This function tries to get the current event loop. If the current event loop is closed or does not exist,
|
209 |
-
it creates a new event loop and sets it as the current event loop.
|
210 |
-
|
211 |
-
Returns:
|
212 |
-
asyncio.AbstractEventLoop: The current or newly created event loop.
|
213 |
-
"""
|
214 |
-
try:
|
215 |
-
# Try to get the current event loop
|
216 |
-
current_loop = asyncio.get_event_loop()
|
217 |
-
if current_loop.is_closed():
|
218 |
-
raise RuntimeError("Event loop is closed.")
|
219 |
-
return current_loop
|
220 |
-
|
221 |
-
except RuntimeError:
|
222 |
-
# If no event loop exists or it is closed, create a new one
|
223 |
-
logger.info("Creating a new event loop in main thread.")
|
224 |
-
new_loop = asyncio.new_event_loop()
|
225 |
-
asyncio.set_event_loop(new_loop)
|
226 |
-
return new_loop
|
227 |
|
228 |
|
229 |
@final
|
@@ -428,46 +391,6 @@ class LightRAG:
|
|
428 |
The default function is :func:`.utils.convert_response_to_json`.
|
429 |
"""
|
430 |
|
431 |
-
def verify_storage_implementation(
|
432 |
-
self, storage_type: str, storage_name: str
|
433 |
-
) -> None:
|
434 |
-
"""Verify if storage implementation is compatible with specified storage type
|
435 |
-
|
436 |
-
Args:
|
437 |
-
storage_type: Storage type (KV_STORAGE, GRAPH_STORAGE etc.)
|
438 |
-
storage_name: Storage implementation name
|
439 |
-
|
440 |
-
Raises:
|
441 |
-
ValueError: If storage implementation is incompatible or missing required methods
|
442 |
-
"""
|
443 |
-
if storage_type not in STORAGE_IMPLEMENTATIONS:
|
444 |
-
raise ValueError(f"Unknown storage type: {storage_type}")
|
445 |
-
|
446 |
-
storage_info = STORAGE_IMPLEMENTATIONS[storage_type]
|
447 |
-
if storage_name not in storage_info["implementations"]:
|
448 |
-
raise ValueError(
|
449 |
-
f"Storage implementation '{storage_name}' is not compatible with {storage_type}. "
|
450 |
-
f"Compatible implementations are: {', '.join(storage_info['implementations'])}"
|
451 |
-
)
|
452 |
-
|
453 |
-
def check_storage_env_vars(self, storage_name: str) -> None:
|
454 |
-
"""Check if all required environment variables for storage implementation exist
|
455 |
-
|
456 |
-
Args:
|
457 |
-
storage_name: Storage implementation name
|
458 |
-
|
459 |
-
Raises:
|
460 |
-
ValueError: If required environment variables are missing
|
461 |
-
"""
|
462 |
-
required_vars = STORAGE_ENV_REQUIREMENTS.get(storage_name, [])
|
463 |
-
missing_vars = [var for var in required_vars if var not in os.environ]
|
464 |
-
|
465 |
-
if missing_vars:
|
466 |
-
raise ValueError(
|
467 |
-
f"Storage implementation '{storage_name}' requires the following "
|
468 |
-
f"environment variables: {', '.join(missing_vars)}"
|
469 |
-
)
|
470 |
-
|
471 |
def __post_init__(self):
|
472 |
os.makedirs(self.log_dir, exist_ok=True)
|
473 |
log_file = os.path.join(self.log_dir, "lightrag.log")
|
@@ -1681,3 +1604,43 @@ class LightRAG:
|
|
1681 |
result["vector_data"] = vector_data[0] if vector_data else None
|
1682 |
|
1683 |
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
from .prompt import GRAPH_FIELD_SEP
|
33 |
from .utils import (
|
34 |
EmbeddingFunc,
|
35 |
+
always_get_an_event_loop,
|
36 |
compute_mdhash_id,
|
37 |
convert_response_to_json,
|
38 |
+
lazy_external_import,
|
39 |
limit_async_func_call,
|
40 |
logger,
|
41 |
set_logger,
|
|
|
184 |
}
|
185 |
|
186 |
|
|
|
|
|
|
|
|
|
187 |
|
|
|
|
|
|
|
188 |
|
|
|
|
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
|
192 |
@final
|
|
|
391 |
The default function is :func:`.utils.convert_response_to_json`.
|
392 |
"""
|
393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
def __post_init__(self):
|
395 |
os.makedirs(self.log_dir, exist_ok=True)
|
396 |
log_file = os.path.join(self.log_dir, "lightrag.log")
|
|
|
1604 |
result["vector_data"] = vector_data[0] if vector_data else None
|
1605 |
|
1606 |
return result
|
1607 |
+
|
1608 |
+
def verify_storage_implementation(
|
1609 |
+
self, storage_type: str, storage_name: str
|
1610 |
+
) -> None:
|
1611 |
+
"""Verify if storage implementation is compatible with specified storage type
|
1612 |
+
|
1613 |
+
Args:
|
1614 |
+
storage_type: Storage type (KV_STORAGE, GRAPH_STORAGE etc.)
|
1615 |
+
storage_name: Storage implementation name
|
1616 |
+
|
1617 |
+
Raises:
|
1618 |
+
ValueError: If storage implementation is incompatible or missing required methods
|
1619 |
+
"""
|
1620 |
+
if storage_type not in STORAGE_IMPLEMENTATIONS:
|
1621 |
+
raise ValueError(f"Unknown storage type: {storage_type}")
|
1622 |
+
|
1623 |
+
storage_info = STORAGE_IMPLEMENTATIONS[storage_type]
|
1624 |
+
if storage_name not in storage_info["implementations"]:
|
1625 |
+
raise ValueError(
|
1626 |
+
f"Storage implementation '{storage_name}' is not compatible with {storage_type}. "
|
1627 |
+
f"Compatible implementations are: {', '.join(storage_info['implementations'])}"
|
1628 |
+
)
|
1629 |
+
|
1630 |
+
def check_storage_env_vars(self, storage_name: str) -> None:
|
1631 |
+
"""Check if all required environment variables for storage implementation exist
|
1632 |
+
|
1633 |
+
Args:
|
1634 |
+
storage_name: Storage implementation name
|
1635 |
+
|
1636 |
+
Raises:
|
1637 |
+
ValueError: If required environment variables are missing
|
1638 |
+
"""
|
1639 |
+
required_vars = STORAGE_ENV_REQUIREMENTS.get(storage_name, [])
|
1640 |
+
missing_vars = [var for var in required_vars if var not in os.environ]
|
1641 |
+
|
1642 |
+
if missing_vars:
|
1643 |
+
raise ValueError(
|
1644 |
+
f"Storage implementation '{storage_name}' requires the following "
|
1645 |
+
f"environment variables: {', '.join(missing_vars)}"
|
1646 |
+
)
|
lightrag/utils.py
CHANGED
@@ -713,3 +713,47 @@ def get_conversation_turns(
|
|
713 |
)
|
714 |
|
715 |
return "\n".join(formatted_turns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
713 |
)
|
714 |
|
715 |
return "\n".join(formatted_turns)
|
716 |
+
|
717 |
+
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
718 |
+
"""
|
719 |
+
Ensure that there is always an event loop available.
|
720 |
+
|
721 |
+
This function tries to get the current event loop. If the current event loop is closed or does not exist,
|
722 |
+
it creates a new event loop and sets it as the current event loop.
|
723 |
+
|
724 |
+
Returns:
|
725 |
+
asyncio.AbstractEventLoop: The current or newly created event loop.
|
726 |
+
"""
|
727 |
+
try:
|
728 |
+
# Try to get the current event loop
|
729 |
+
current_loop = asyncio.get_event_loop()
|
730 |
+
if current_loop.is_closed():
|
731 |
+
raise RuntimeError("Event loop is closed.")
|
732 |
+
return current_loop
|
733 |
+
|
734 |
+
except RuntimeError:
|
735 |
+
# If no event loop exists or it is closed, create a new one
|
736 |
+
logger.info("Creating a new event loop in main thread.")
|
737 |
+
new_loop = asyncio.new_event_loop()
|
738 |
+
asyncio.set_event_loop(new_loop)
|
739 |
+
return new_loop
|
740 |
+
|
741 |
+
|
742 |
+
def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any]:
|
743 |
+
"""Lazily import a class from an external module based on the package of the caller."""
|
744 |
+
# Get the caller's module and package
|
745 |
+
import inspect
|
746 |
+
|
747 |
+
caller_frame = inspect.currentframe().f_back
|
748 |
+
module = inspect.getmodule(caller_frame)
|
749 |
+
package = module.__package__ if module else None
|
750 |
+
|
751 |
+
def import_class(*args: Any, **kwargs: Any):
|
752 |
+
import importlib
|
753 |
+
|
754 |
+
module = importlib.import_module(module_name, package=package)
|
755 |
+
cls = getattr(module, class_name)
|
756 |
+
return cls(*args, **kwargs)
|
757 |
+
|
758 |
+
return import_class
|
759 |
+
|
reproduce/Step_3.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import re
|
2 |
import json
|
3 |
-
import asyncio
|
4 |
from lightrag import LightRAG, QueryParam
|
|
|
5 |
|
6 |
|
7 |
def extract_queries(file_path):
|
@@ -23,14 +23,6 @@ async def process_query(query_text, rag_instance, query_param):
|
|
23 |
return None, {"query": query_text, "error": str(e)}
|
24 |
|
25 |
|
26 |
-
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
27 |
-
try:
|
28 |
-
loop = asyncio.get_event_loop()
|
29 |
-
except RuntimeError:
|
30 |
-
loop = asyncio.new_event_loop()
|
31 |
-
asyncio.set_event_loop(loop)
|
32 |
-
return loop
|
33 |
-
|
34 |
|
35 |
def run_queries_and_save_to_json(
|
36 |
queries, rag_instance, query_param, output_file, error_file
|
|
|
1 |
import re
|
2 |
import json
|
|
|
3 |
from lightrag import LightRAG, QueryParam
|
4 |
+
from lightrag.utils import always_get_an_event_loop
|
5 |
|
6 |
|
7 |
def extract_queries(file_path):
|
|
|
23 |
return None, {"query": query_text, "error": str(e)}
|
24 |
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
def run_queries_and_save_to_json(
|
28 |
queries, rag_instance, query_param, output_file, error_file
|
reproduce/Step_3_openai_compatible.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
import os
|
2 |
import re
|
3 |
import json
|
4 |
-
import asyncio
|
5 |
from lightrag import LightRAG, QueryParam
|
6 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
7 |
-
from lightrag.utils import EmbeddingFunc
|
8 |
import numpy as np
|
9 |
|
10 |
|
@@ -55,13 +54,7 @@ async def process_query(query_text, rag_instance, query_param):
|
|
55 |
return None, {"query": query_text, "error": str(e)}
|
56 |
|
57 |
|
58 |
-
|
59 |
-
try:
|
60 |
-
loop = asyncio.get_event_loop()
|
61 |
-
except RuntimeError:
|
62 |
-
loop = asyncio.new_event_loop()
|
63 |
-
asyncio.set_event_loop(loop)
|
64 |
-
return loop
|
65 |
|
66 |
|
67 |
def run_queries_and_save_to_json(
|
|
|
1 |
import os
|
2 |
import re
|
3 |
import json
|
|
|
4 |
from lightrag import LightRAG, QueryParam
|
5 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
6 |
+
from lightrag.utils import EmbeddingFunc, always_get_an_event_loop
|
7 |
import numpy as np
|
8 |
|
9 |
|
|
|
54 |
return None, {"query": query_text, "error": str(e)}
|
55 |
|
56 |
|
57 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
|
60 |
def run_queries_and_save_to_json(
|