YanSte commited on
Commit
9a3530b
·
1 Parent(s): ab7ca21

cleaned type

Browse files
Files changed (3) hide show
  1. lightrag/lightrag.py +15 -6
  2. lightrag/operate.py +5 -6
  3. lightrag/utils.py +1 -1
lightrag/lightrag.py CHANGED
@@ -4,7 +4,7 @@ from tqdm.asyncio import tqdm as tqdm_async
4
  from dataclasses import asdict, dataclass, field
5
  from datetime import datetime
6
  from functools import partial
7
- from typing import Any, Type, Union, cast
8
  import traceback
9
  from .operate import (
10
  chunking_by_token_size,
@@ -177,13 +177,24 @@ class LightRAG:
177
 
178
  # extension
179
  addon_params: dict[str, Any] = field(default_factory=dict)
180
- convert_response_to_json_func: callable = convert_response_to_json
181
 
182
  # Add new field for document status storage type
183
  doc_status_storage: str = field(default="JsonDocStatusStorage")
184
 
185
  # Custom Chunking Function
186
- chunking_func: callable = chunking_by_token_size
 
 
 
 
 
 
 
 
 
 
 
187
  chunking_func_kwargs: dict = field(default_factory=dict)
188
 
189
  def __post_init__(self):
@@ -538,9 +549,7 @@ class LightRAG:
538
  return
539
 
540
  full_docs_ids = await self.full_docs.get_by_ids(to_process_doc_keys)
541
- new_docs = {}
542
- if full_docs_ids:
543
- new_docs = {doc["id"]: doc for doc in full_docs_ids or []}
544
 
545
  if not new_docs:
546
  logger.info("All documents have been processed or are duplicates")
 
4
  from dataclasses import asdict, dataclass, field
5
  from datetime import datetime
6
  from functools import partial
7
+ from typing import Any, Callable, Optional, Type, Union, cast
8
  import traceback
9
  from .operate import (
10
  chunking_by_token_size,
 
177
 
178
  # extension
179
  addon_params: dict[str, Any] = field(default_factory=dict)
180
+ convert_response_to_json_func: Callable[[str], dict[str, Any]] = convert_response_to_json
181
 
182
  # Add new field for document status storage type
183
  doc_status_storage: str = field(default="JsonDocStatusStorage")
184
 
185
  # Custom Chunking Function
186
+ chunking_func: Callable[
187
+ [
188
+ str,
189
+ Optional[str],
190
+ bool,
191
+ int,
192
+ int,
193
+ str,
194
+ ],
195
+ list[dict[str, Any]],
196
+ ] = chunking_by_token_size
197
+
198
  chunking_func_kwargs: dict = field(default_factory=dict)
199
 
200
  def __post_init__(self):
 
549
  return
550
 
551
  full_docs_ids = await self.full_docs.get_by_ids(to_process_doc_keys)
552
+ new_docs = {doc["id"]: doc for doc in full_docs_ids or []}
 
 
553
 
554
  if not new_docs:
555
  logger.info("All documents have been processed or are duplicates")
lightrag/operate.py CHANGED
@@ -36,12 +36,11 @@ import time
36
 
37
  def chunking_by_token_size(
38
  content: str,
39
- split_by_character=None,
40
- split_by_character_only=False,
41
- overlap_token_size=128,
42
- max_token_size=1024,
43
- tiktoken_model="gpt-4o",
44
- **kwargs,
45
  ) -> list[dict[str, Any]]:
46
  tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
47
  results: list[dict[str, Any]] = []
 
36
 
37
  def chunking_by_token_size(
38
  content: str,
39
+ split_by_character: Union[str, None]=None,
40
+ split_by_character_only: bool =False,
41
+ overlap_token_size: int =128,
42
+ max_token_size: int =1024,
43
+ tiktoken_model: str="gpt-4o"
 
44
  ) -> list[dict[str, Any]]:
45
  tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
46
  results: list[dict[str, Any]] = []
lightrag/utils.py CHANGED
@@ -98,7 +98,7 @@ def locate_json_string_body_from_string(content: str) -> Union[str, None]:
98
  return None
99
 
100
 
101
- def convert_response_to_json(response: str) -> dict:
102
  json_str = locate_json_string_body_from_string(response)
103
  assert json_str is not None, f"Unable to parse JSON from response: {response}"
104
  try:
 
98
  return None
99
 
100
 
101
+ def convert_response_to_json(response: str) -> dict[str, Any]:
102
  json_str = locate_json_string_body_from_string(response)
103
  assert json_str is not None, f"Unable to parse JSON from response: {response}"
104
  try: