cleanup code
Browse files- docker-compose.yml +0 -2
- examples/lightrag_api_oracle_demo.py +0 -1
- examples/lightrag_openai_compatible_stream_demo.py +0 -7
- examples/lightrag_tidb_demo.py +0 -1
- lightrag/lightrag.py +42 -79
- lightrag/utils.py +44 -0
- reproduce/Step_3.py +1 -9
- reproduce/Step_3_openai_compatible.py +2 -9
docker-compose.yml
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
version: '3.8'
|
| 2 |
-
|
| 3 |
services:
|
| 4 |
lightrag:
|
| 5 |
build: .
|
|
|
|
|
|
|
|
|
|
| 1 |
services:
|
| 2 |
lightrag:
|
| 3 |
build: .
|
examples/lightrag_api_oracle_demo.py
CHANGED
|
@@ -98,7 +98,6 @@ async def init():
|
|
| 98 |
|
| 99 |
# Initialize LightRAG
|
| 100 |
# We use Oracle DB as the KV/vector/graph storage
|
| 101 |
-
# You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
|
| 102 |
rag = LightRAG(
|
| 103 |
enable_llm_cache=False,
|
| 104 |
working_dir=WORKING_DIR,
|
|
|
|
| 98 |
|
| 99 |
# Initialize LightRAG
|
| 100 |
# We use Oracle DB as the KV/vector/graph storage
|
|
|
|
| 101 |
rag = LightRAG(
|
| 102 |
enable_llm_cache=False,
|
| 103 |
working_dir=WORKING_DIR,
|
examples/lightrag_openai_compatible_stream_demo.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
-
import inspect
|
| 3 |
from lightrag import LightRAG
|
| 4 |
from lightrag.llm import openai_complete, openai_embed
|
| 5 |
from lightrag.utils import EmbeddingFunc
|
| 6 |
-
from lightrag.lightrag import always_get_an_event_loop
|
| 7 |
from lightrag import QueryParam
|
| 8 |
|
| 9 |
# WorkingDir
|
|
@@ -48,8 +46,3 @@ async def print_stream(stream):
|
|
| 48 |
print(chunk, end="", flush=True)
|
| 49 |
|
| 50 |
|
| 51 |
-
loop = always_get_an_event_loop()
|
| 52 |
-
if inspect.isasyncgen(resp):
|
| 53 |
-
loop.run_until_complete(print_stream(resp))
|
| 54 |
-
else:
|
| 55 |
-
print(resp)
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
from lightrag import LightRAG
|
| 3 |
from lightrag.llm import openai_complete, openai_embed
|
| 4 |
from lightrag.utils import EmbeddingFunc
|
|
|
|
| 5 |
from lightrag import QueryParam
|
| 6 |
|
| 7 |
# WorkingDir
|
|
|
|
| 46 |
print(chunk, end="", flush=True)
|
| 47 |
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/lightrag_tidb_demo.py
CHANGED
|
@@ -63,7 +63,6 @@ async def main():
|
|
| 63 |
|
| 64 |
# Initialize LightRAG
|
| 65 |
# We use TiDB DB as the KV/vector
|
| 66 |
-
# You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
|
| 67 |
rag = LightRAG(
|
| 68 |
enable_llm_cache=False,
|
| 69 |
working_dir=WORKING_DIR,
|
|
|
|
| 63 |
|
| 64 |
# Initialize LightRAG
|
| 65 |
# We use TiDB DB as the KV/vector
|
|
|
|
| 66 |
rag = LightRAG(
|
| 67 |
enable_llm_cache=False,
|
| 68 |
working_dir=WORKING_DIR,
|
lightrag/lightrag.py
CHANGED
|
@@ -32,8 +32,10 @@ from .operate import (
|
|
| 32 |
from .prompt import GRAPH_FIELD_SEP
|
| 33 |
from .utils import (
|
| 34 |
EmbeddingFunc,
|
|
|
|
| 35 |
compute_mdhash_id,
|
| 36 |
convert_response_to_json,
|
|
|
|
| 37 |
limit_async_func_call,
|
| 38 |
logger,
|
| 39 |
set_logger,
|
|
@@ -182,48 +184,9 @@ STORAGES = {
|
|
| 182 |
}
|
| 183 |
|
| 184 |
|
| 185 |
-
def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any]:
|
| 186 |
-
"""Lazily import a class from an external module based on the package of the caller."""
|
| 187 |
-
# Get the caller's module and package
|
| 188 |
-
import inspect
|
| 189 |
|
| 190 |
-
caller_frame = inspect.currentframe().f_back
|
| 191 |
-
module = inspect.getmodule(caller_frame)
|
| 192 |
-
package = module.__package__ if module else None
|
| 193 |
|
| 194 |
-
def import_class(*args: Any, **kwargs: Any):
|
| 195 |
-
import importlib
|
| 196 |
|
| 197 |
-
module = importlib.import_module(module_name, package=package)
|
| 198 |
-
cls = getattr(module, class_name)
|
| 199 |
-
return cls(*args, **kwargs)
|
| 200 |
-
|
| 201 |
-
return import_class
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
| 205 |
-
"""
|
| 206 |
-
Ensure that there is always an event loop available.
|
| 207 |
-
|
| 208 |
-
This function tries to get the current event loop. If the current event loop is closed or does not exist,
|
| 209 |
-
it creates a new event loop and sets it as the current event loop.
|
| 210 |
-
|
| 211 |
-
Returns:
|
| 212 |
-
asyncio.AbstractEventLoop: The current or newly created event loop.
|
| 213 |
-
"""
|
| 214 |
-
try:
|
| 215 |
-
# Try to get the current event loop
|
| 216 |
-
current_loop = asyncio.get_event_loop()
|
| 217 |
-
if current_loop.is_closed():
|
| 218 |
-
raise RuntimeError("Event loop is closed.")
|
| 219 |
-
return current_loop
|
| 220 |
-
|
| 221 |
-
except RuntimeError:
|
| 222 |
-
# If no event loop exists or it is closed, create a new one
|
| 223 |
-
logger.info("Creating a new event loop in main thread.")
|
| 224 |
-
new_loop = asyncio.new_event_loop()
|
| 225 |
-
asyncio.set_event_loop(new_loop)
|
| 226 |
-
return new_loop
|
| 227 |
|
| 228 |
|
| 229 |
@final
|
|
@@ -428,46 +391,6 @@ class LightRAG:
|
|
| 428 |
The default function is :func:`.utils.convert_response_to_json`.
|
| 429 |
"""
|
| 430 |
|
| 431 |
-
def verify_storage_implementation(
|
| 432 |
-
self, storage_type: str, storage_name: str
|
| 433 |
-
) -> None:
|
| 434 |
-
"""Verify if storage implementation is compatible with specified storage type
|
| 435 |
-
|
| 436 |
-
Args:
|
| 437 |
-
storage_type: Storage type (KV_STORAGE, GRAPH_STORAGE etc.)
|
| 438 |
-
storage_name: Storage implementation name
|
| 439 |
-
|
| 440 |
-
Raises:
|
| 441 |
-
ValueError: If storage implementation is incompatible or missing required methods
|
| 442 |
-
"""
|
| 443 |
-
if storage_type not in STORAGE_IMPLEMENTATIONS:
|
| 444 |
-
raise ValueError(f"Unknown storage type: {storage_type}")
|
| 445 |
-
|
| 446 |
-
storage_info = STORAGE_IMPLEMENTATIONS[storage_type]
|
| 447 |
-
if storage_name not in storage_info["implementations"]:
|
| 448 |
-
raise ValueError(
|
| 449 |
-
f"Storage implementation '{storage_name}' is not compatible with {storage_type}. "
|
| 450 |
-
f"Compatible implementations are: {', '.join(storage_info['implementations'])}"
|
| 451 |
-
)
|
| 452 |
-
|
| 453 |
-
def check_storage_env_vars(self, storage_name: str) -> None:
|
| 454 |
-
"""Check if all required environment variables for storage implementation exist
|
| 455 |
-
|
| 456 |
-
Args:
|
| 457 |
-
storage_name: Storage implementation name
|
| 458 |
-
|
| 459 |
-
Raises:
|
| 460 |
-
ValueError: If required environment variables are missing
|
| 461 |
-
"""
|
| 462 |
-
required_vars = STORAGE_ENV_REQUIREMENTS.get(storage_name, [])
|
| 463 |
-
missing_vars = [var for var in required_vars if var not in os.environ]
|
| 464 |
-
|
| 465 |
-
if missing_vars:
|
| 466 |
-
raise ValueError(
|
| 467 |
-
f"Storage implementation '{storage_name}' requires the following "
|
| 468 |
-
f"environment variables: {', '.join(missing_vars)}"
|
| 469 |
-
)
|
| 470 |
-
|
| 471 |
def __post_init__(self):
|
| 472 |
os.makedirs(self.log_dir, exist_ok=True)
|
| 473 |
log_file = os.path.join(self.log_dir, "lightrag.log")
|
|
@@ -1681,3 +1604,43 @@ class LightRAG:
|
|
| 1681 |
result["vector_data"] = vector_data[0] if vector_data else None
|
| 1682 |
|
| 1683 |
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
from .prompt import GRAPH_FIELD_SEP
|
| 33 |
from .utils import (
|
| 34 |
EmbeddingFunc,
|
| 35 |
+
always_get_an_event_loop,
|
| 36 |
compute_mdhash_id,
|
| 37 |
convert_response_to_json,
|
| 38 |
+
lazy_external_import,
|
| 39 |
limit_async_func_call,
|
| 40 |
logger,
|
| 41 |
set_logger,
|
|
|
|
| 184 |
}
|
| 185 |
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
|
|
|
|
|
|
|
|
|
| 188 |
|
|
|
|
|
|
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
|
| 192 |
@final
|
|
|
|
| 391 |
The default function is :func:`.utils.convert_response_to_json`.
|
| 392 |
"""
|
| 393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
def __post_init__(self):
|
| 395 |
os.makedirs(self.log_dir, exist_ok=True)
|
| 396 |
log_file = os.path.join(self.log_dir, "lightrag.log")
|
|
|
|
| 1604 |
result["vector_data"] = vector_data[0] if vector_data else None
|
| 1605 |
|
| 1606 |
return result
|
| 1607 |
+
|
| 1608 |
+
def verify_storage_implementation(
|
| 1609 |
+
self, storage_type: str, storage_name: str
|
| 1610 |
+
) -> None:
|
| 1611 |
+
"""Verify if storage implementation is compatible with specified storage type
|
| 1612 |
+
|
| 1613 |
+
Args:
|
| 1614 |
+
storage_type: Storage type (KV_STORAGE, GRAPH_STORAGE etc.)
|
| 1615 |
+
storage_name: Storage implementation name
|
| 1616 |
+
|
| 1617 |
+
Raises:
|
| 1618 |
+
ValueError: If storage implementation is incompatible or missing required methods
|
| 1619 |
+
"""
|
| 1620 |
+
if storage_type not in STORAGE_IMPLEMENTATIONS:
|
| 1621 |
+
raise ValueError(f"Unknown storage type: {storage_type}")
|
| 1622 |
+
|
| 1623 |
+
storage_info = STORAGE_IMPLEMENTATIONS[storage_type]
|
| 1624 |
+
if storage_name not in storage_info["implementations"]:
|
| 1625 |
+
raise ValueError(
|
| 1626 |
+
f"Storage implementation '{storage_name}' is not compatible with {storage_type}. "
|
| 1627 |
+
f"Compatible implementations are: {', '.join(storage_info['implementations'])}"
|
| 1628 |
+
)
|
| 1629 |
+
|
| 1630 |
+
def check_storage_env_vars(self, storage_name: str) -> None:
|
| 1631 |
+
"""Check if all required environment variables for storage implementation exist
|
| 1632 |
+
|
| 1633 |
+
Args:
|
| 1634 |
+
storage_name: Storage implementation name
|
| 1635 |
+
|
| 1636 |
+
Raises:
|
| 1637 |
+
ValueError: If required environment variables are missing
|
| 1638 |
+
"""
|
| 1639 |
+
required_vars = STORAGE_ENV_REQUIREMENTS.get(storage_name, [])
|
| 1640 |
+
missing_vars = [var for var in required_vars if var not in os.environ]
|
| 1641 |
+
|
| 1642 |
+
if missing_vars:
|
| 1643 |
+
raise ValueError(
|
| 1644 |
+
f"Storage implementation '{storage_name}' requires the following "
|
| 1645 |
+
f"environment variables: {', '.join(missing_vars)}"
|
| 1646 |
+
)
|
lightrag/utils.py
CHANGED
|
@@ -713,3 +713,47 @@ def get_conversation_turns(
|
|
| 713 |
)
|
| 714 |
|
| 715 |
return "\n".join(formatted_turns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
)
|
| 714 |
|
| 715 |
return "\n".join(formatted_turns)
|
| 716 |
+
|
| 717 |
+
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
| 718 |
+
"""
|
| 719 |
+
Ensure that there is always an event loop available.
|
| 720 |
+
|
| 721 |
+
This function tries to get the current event loop. If the current event loop is closed or does not exist,
|
| 722 |
+
it creates a new event loop and sets it as the current event loop.
|
| 723 |
+
|
| 724 |
+
Returns:
|
| 725 |
+
asyncio.AbstractEventLoop: The current or newly created event loop.
|
| 726 |
+
"""
|
| 727 |
+
try:
|
| 728 |
+
# Try to get the current event loop
|
| 729 |
+
current_loop = asyncio.get_event_loop()
|
| 730 |
+
if current_loop.is_closed():
|
| 731 |
+
raise RuntimeError("Event loop is closed.")
|
| 732 |
+
return current_loop
|
| 733 |
+
|
| 734 |
+
except RuntimeError:
|
| 735 |
+
# If no event loop exists or it is closed, create a new one
|
| 736 |
+
logger.info("Creating a new event loop in main thread.")
|
| 737 |
+
new_loop = asyncio.new_event_loop()
|
| 738 |
+
asyncio.set_event_loop(new_loop)
|
| 739 |
+
return new_loop
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any]:
|
| 743 |
+
"""Lazily import a class from an external module based on the package of the caller."""
|
| 744 |
+
# Get the caller's module and package
|
| 745 |
+
import inspect
|
| 746 |
+
|
| 747 |
+
caller_frame = inspect.currentframe().f_back
|
| 748 |
+
module = inspect.getmodule(caller_frame)
|
| 749 |
+
package = module.__package__ if module else None
|
| 750 |
+
|
| 751 |
+
def import_class(*args: Any, **kwargs: Any):
|
| 752 |
+
import importlib
|
| 753 |
+
|
| 754 |
+
module = importlib.import_module(module_name, package=package)
|
| 755 |
+
cls = getattr(module, class_name)
|
| 756 |
+
return cls(*args, **kwargs)
|
| 757 |
+
|
| 758 |
+
return import_class
|
| 759 |
+
|
reproduce/Step_3.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import re
|
| 2 |
import json
|
| 3 |
-
import asyncio
|
| 4 |
from lightrag import LightRAG, QueryParam
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
def extract_queries(file_path):
|
|
@@ -23,14 +23,6 @@ async def process_query(query_text, rag_instance, query_param):
|
|
| 23 |
return None, {"query": query_text, "error": str(e)}
|
| 24 |
|
| 25 |
|
| 26 |
-
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
| 27 |
-
try:
|
| 28 |
-
loop = asyncio.get_event_loop()
|
| 29 |
-
except RuntimeError:
|
| 30 |
-
loop = asyncio.new_event_loop()
|
| 31 |
-
asyncio.set_event_loop(loop)
|
| 32 |
-
return loop
|
| 33 |
-
|
| 34 |
|
| 35 |
def run_queries_and_save_to_json(
|
| 36 |
queries, rag_instance, query_param, output_file, error_file
|
|
|
|
| 1 |
import re
|
| 2 |
import json
|
|
|
|
| 3 |
from lightrag import LightRAG, QueryParam
|
| 4 |
+
from lightrag.utils import always_get_an_event_loop
|
| 5 |
|
| 6 |
|
| 7 |
def extract_queries(file_path):
|
|
|
|
| 23 |
return None, {"query": query_text, "error": str(e)}
|
| 24 |
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
def run_queries_and_save_to_json(
|
| 28 |
queries, rag_instance, query_param, output_file, error_file
|
reproduce/Step_3_openai_compatible.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
-
import asyncio
|
| 5 |
from lightrag import LightRAG, QueryParam
|
| 6 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
| 7 |
-
from lightrag.utils import EmbeddingFunc
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
|
|
@@ -55,13 +54,7 @@ async def process_query(query_text, rag_instance, query_param):
|
|
| 55 |
return None, {"query": query_text, "error": str(e)}
|
| 56 |
|
| 57 |
|
| 58 |
-
|
| 59 |
-
try:
|
| 60 |
-
loop = asyncio.get_event_loop()
|
| 61 |
-
except RuntimeError:
|
| 62 |
-
loop = asyncio.new_event_loop()
|
| 63 |
-
asyncio.set_event_loop(loop)
|
| 64 |
-
return loop
|
| 65 |
|
| 66 |
|
| 67 |
def run_queries_and_save_to_json(
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
|
|
|
| 4 |
from lightrag import LightRAG, QueryParam
|
| 5 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
| 6 |
+
from lightrag.utils import EmbeddingFunc, always_get_an_event_loop
|
| 7 |
import numpy as np
|
| 8 |
|
| 9 |
|
|
|
|
| 54 |
return None, {"query": query_text, "error": str(e)}
|
| 55 |
|
| 56 |
|
| 57 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
def run_queries_and_save_to_json(
|