yangdx commited on
Commit
1e7a838
Β·
2 Parent(s): ca8ab4c c4b77bf

Merge branch 'clear-text-before-insert' into simplify-cli-arguments

Browse files
lightrag/api/lightrag_server.py CHANGED
@@ -181,6 +181,8 @@ def create_app(args):
181
  "Skip document scanning(another scanning is active)"
182
  )
183
 
 
 
184
  yield
185
 
186
  finally:
 
181
  "Skip document scanning(another scanning is active)"
182
  )
183
 
184
+ ASCIIColors.green("\nServer is ready to accept connections! πŸš€\n")
185
+
186
  yield
187
 
188
  finally:
lightrag/api/routers/document_routes.py CHANGED
@@ -161,7 +161,7 @@ class DocumentManager:
161
  """Scan input directory for new files"""
162
  new_files = []
163
  for ext in self.supported_extensions:
164
- logging.info(f"Scanning for {ext} files in {self.input_dir}")
165
  for file_path in self.input_dir.rglob(f"*{ext}"):
166
  if file_path not in self.indexed_files:
167
  new_files.append(file_path)
 
161
  """Scan input directory for new files"""
162
  new_files = []
163
  for ext in self.supported_extensions:
164
+ logging.debug(f"Scanning for {ext} files in {self.input_dir}")
165
  for file_path in self.input_dir.rglob(f"*{ext}"):
166
  if file_path not in self.indexed_files:
167
  new_files.append(file_path)
lightrag/api/utils_api.py CHANGED
@@ -492,7 +492,5 @@ def display_splash_screen(args: argparse.Namespace) -> None:
492
  Make sure to include the X-API-Key header in all your requests.
493
  """)
494
 
495
- ASCIIColors.green("Server is ready to accept connections! πŸš€\n")
496
-
497
  # Ensure splash output flush to system log
498
  sys.stdout.flush()
 
492
  Make sure to include the X-API-Key header in all your requests.
493
  """)
494
 
 
 
495
  # Ensure splash output flush to system log
496
  sys.stdout.flush()
lightrag/lightrag.py CHANGED
@@ -581,7 +581,7 @@ class LightRAG:
581
  await self._insert_done()
582
 
583
  async def apipeline_enqueue_documents(
584
- self, input: str | list[str], ids: list[str] | None
585
  ) -> None:
586
  """
587
  Pipeline for Processing Documents
@@ -595,9 +595,6 @@ class LightRAG:
595
  if isinstance(input, str):
596
  input = [input]
597
 
598
- # Clean input text and remove duplicates
599
- input = list(set(self.clean_text(doc) for doc in input))
600
-
601
  # 1. Validate ids if provided or generate MD5 hash IDs
602
  if ids is not None:
603
  # Check if the number of IDs matches the number of documents
@@ -611,6 +608,8 @@ class LightRAG:
611
  # Generate contents dict of IDs provided by user and documents
612
  contents = {id_: doc for id_, doc in zip(ids, input)}
613
  else:
 
 
614
  # Generate contents dict of MD5 hash IDs and documents
615
  contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
616
 
 
581
  await self._insert_done()
582
 
583
  async def apipeline_enqueue_documents(
584
+ self, input: str | list[str], ids: list[str] | None = None
585
  ) -> None:
586
  """
587
  Pipeline for Processing Documents
 
595
  if isinstance(input, str):
596
  input = [input]
597
 
 
 
 
598
  # 1. Validate ids if provided or generate MD5 hash IDs
599
  if ids is not None:
600
  # Check if the number of IDs matches the number of documents
 
608
  # Generate contents dict of IDs provided by user and documents
609
  contents = {id_: doc for id_, doc in zip(ids, input)}
610
  else:
611
+ # Clean input text and remove duplicates
612
+ input = list(set(self.clean_text(doc) for doc in input))
613
  # Generate contents dict of MD5 hash IDs and documents
614
  contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
615