Merge branch 'clear-text-before-insert' into simplify-cli-arguments
Browse files
lightrag/api/lightrag_server.py
CHANGED
@@ -181,6 +181,8 @@ def create_app(args):
|
|
181 |
"Skip document scanning(another scanning is active)"
|
182 |
)
|
183 |
|
|
|
|
|
184 |
yield
|
185 |
|
186 |
finally:
|
|
|
181 |
"Skip document scanning(another scanning is active)"
|
182 |
)
|
183 |
|
184 |
+
ASCIIColors.green("\nServer is ready to accept connections! π\n")
|
185 |
+
|
186 |
yield
|
187 |
|
188 |
finally:
|
lightrag/api/routers/document_routes.py
CHANGED
@@ -161,7 +161,7 @@ class DocumentManager:
|
|
161 |
"""Scan input directory for new files"""
|
162 |
new_files = []
|
163 |
for ext in self.supported_extensions:
|
164 |
-
logging.
|
165 |
for file_path in self.input_dir.rglob(f"*{ext}"):
|
166 |
if file_path not in self.indexed_files:
|
167 |
new_files.append(file_path)
|
|
|
161 |
"""Scan input directory for new files"""
|
162 |
new_files = []
|
163 |
for ext in self.supported_extensions:
|
164 |
+
logging.debug(f"Scanning for {ext} files in {self.input_dir}")
|
165 |
for file_path in self.input_dir.rglob(f"*{ext}"):
|
166 |
if file_path not in self.indexed_files:
|
167 |
new_files.append(file_path)
|
lightrag/api/utils_api.py
CHANGED
@@ -492,7 +492,5 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
492 |
Make sure to include the X-API-Key header in all your requests.
|
493 |
""")
|
494 |
|
495 |
-
ASCIIColors.green("Server is ready to accept connections! π\n")
|
496 |
-
|
497 |
# Ensure splash output flush to system log
|
498 |
sys.stdout.flush()
|
|
|
492 |
Make sure to include the X-API-Key header in all your requests.
|
493 |
""")
|
494 |
|
|
|
|
|
495 |
# Ensure splash output flush to system log
|
496 |
sys.stdout.flush()
|
lightrag/lightrag.py
CHANGED
@@ -581,7 +581,7 @@ class LightRAG:
|
|
581 |
await self._insert_done()
|
582 |
|
583 |
async def apipeline_enqueue_documents(
|
584 |
-
self, input: str | list[str], ids: list[str] | None
|
585 |
) -> None:
|
586 |
"""
|
587 |
Pipeline for Processing Documents
|
@@ -595,9 +595,6 @@ class LightRAG:
|
|
595 |
if isinstance(input, str):
|
596 |
input = [input]
|
597 |
|
598 |
-
# Clean input text and remove duplicates
|
599 |
-
input = list(set(self.clean_text(doc) for doc in input))
|
600 |
-
|
601 |
# 1. Validate ids if provided or generate MD5 hash IDs
|
602 |
if ids is not None:
|
603 |
# Check if the number of IDs matches the number of documents
|
@@ -611,6 +608,8 @@ class LightRAG:
|
|
611 |
# Generate contents dict of IDs provided by user and documents
|
612 |
contents = {id_: doc for id_, doc in zip(ids, input)}
|
613 |
else:
|
|
|
|
|
614 |
# Generate contents dict of MD5 hash IDs and documents
|
615 |
contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
|
616 |
|
|
|
581 |
await self._insert_done()
|
582 |
|
583 |
async def apipeline_enqueue_documents(
|
584 |
+
self, input: str | list[str], ids: list[str] | None = None
|
585 |
) -> None:
|
586 |
"""
|
587 |
Pipeline for Processing Documents
|
|
|
595 |
if isinstance(input, str):
|
596 |
input = [input]
|
597 |
|
|
|
|
|
|
|
598 |
# 1. Validate ids if provided or generate MD5 hash IDs
|
599 |
if ids is not None:
|
600 |
# Check if the number of IDs matches the number of documents
|
|
|
608 |
# Generate contents dict of IDs provided by user and documents
|
609 |
contents = {id_: doc for id_, doc in zip(ids, input)}
|
610 |
else:
|
611 |
+
# Clean input text and remove duplicates
|
612 |
+
input = list(set(self.clean_text(doc) for doc in input))
|
613 |
# Generate contents dict of MD5 hash IDs and documents
|
614 |
contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
|
615 |
|