Spaces:

rm-lht
/

lightrag

Configuration error

YanSte commited on Feb 9

Commit

9a3530b

1 Parent(s): ab7ca21

cleaned type

Files changed (3) hide show

lightrag/lightrag.py CHANGED Viewed

@@ -4,7 +4,7 @@ from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from functools import partial
-from typing import Any, Type, Union, cast
 import traceback
 from .operate import (
     chunking_by_token_size,
@@ -177,13 +177,24 @@ class LightRAG:
     # extension
     addon_params: dict[str, Any] = field(default_factory=dict)
-    convert_response_to_json_func: callable = convert_response_to_json
     # Add new field for document status storage type
     doc_status_storage: str = field(default="JsonDocStatusStorage")
     # Custom Chunking Function
-    chunking_func: callable = chunking_by_token_size
     chunking_func_kwargs: dict = field(default_factory=dict)
     def __post_init__(self):
@@ -538,9 +549,7 @@ class LightRAG:
             return
         full_docs_ids = await self.full_docs.get_by_ids(to_process_doc_keys)
-        new_docs = {}
-        if full_docs_ids:
-            new_docs = {doc["id"]: doc for doc in full_docs_ids or []}
         if not new_docs:
             logger.info("All documents have been processed or are duplicates")

 from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from functools import partial
+from typing import Any, Callable, Optional, Type, Union, cast
 import traceback
 from .operate import (
     chunking_by_token_size,
     # extension
     addon_params: dict[str, Any] = field(default_factory=dict)
+    convert_response_to_json_func: Callable[[str], dict[str, Any]] = convert_response_to_json
     # Add new field for document status storage type
     doc_status_storage: str = field(default="JsonDocStatusStorage")
     # Custom Chunking Function
+    chunking_func: Callable[
+        [
+            str,
+            Optional[str],
+            bool,
+            int,
+            int,
+            str,
+        ],
+        list[dict[str, Any]],
+    ] = chunking_by_token_size
     chunking_func_kwargs: dict = field(default_factory=dict)
     def __post_init__(self):
             return
         full_docs_ids = await self.full_docs.get_by_ids(to_process_doc_keys)
+        new_docs = {doc["id"]: doc for doc in full_docs_ids or []}
         if not new_docs:
             logger.info("All documents have been processed or are duplicates")

lightrag/operate.py CHANGED Viewed

@@ -36,12 +36,11 @@ import time
 def chunking_by_token_size(
     content: str,
-    split_by_character=None,
-    split_by_character_only=False,
-    overlap_token_size=128,
-    max_token_size=1024,
-    tiktoken_model="gpt-4o",
-    **kwargs,
 ) -> list[dict[str, Any]]:
     tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
     results: list[dict[str, Any]] = []

 def chunking_by_token_size(
     content: str,
+    split_by_character: Union[str, None]=None,
+    split_by_character_only: bool =False,
+    overlap_token_size: int =128,
+    max_token_size: int =1024,
+    tiktoken_model: str="gpt-4o"
 ) -> list[dict[str, Any]]:
     tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
     results: list[dict[str, Any]] = []

lightrag/utils.py CHANGED Viewed

@@ -98,7 +98,7 @@ def locate_json_string_body_from_string(content: str) -> Union[str, None]:
         return None
-def convert_response_to_json(response: str) -> dict:
     json_str = locate_json_string_body_from_string(response)
     assert json_str is not None, f"Unable to parse JSON from response: {response}"
     try:

         return None
+def convert_response_to_json(response: str) -> dict[str, Any]:
     json_str = locate_json_string_body_from_string(response)
     assert json_str is not None, f"Unable to parse JSON from response: {response}"
     try: