Improved empty/whitespace file handling
Browse files- Better detection of whitespace-only files
- Changed error to warning for empty chunks
lightrag/api/routers/document_routes.py
CHANGED
@@ -714,9 +714,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|
714 |
|
715 |
# Insert into the RAG queue
|
716 |
if content:
|
717 |
-
|
718 |
-
if content
|
719 |
-
logger.
|
720 |
|
721 |
await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
|
722 |
logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
|
|
|
714 |
|
715 |
# Insert into the RAG queue
|
716 |
if content:
|
717 |
+
# Check if content contains only whitespace characters
|
718 |
+
if not content.strip():
|
719 |
+
logger.warning(f"File contains only whitespace characters. file_paths={file_path.name}")
|
720 |
|
721 |
await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
|
722 |
logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
|
lightrag/lightrag.py
CHANGED
@@ -966,7 +966,7 @@ class LightRAG:
|
|
966 |
}
|
967 |
|
968 |
if not chunks:
|
969 |
-
logger.
|
970 |
|
971 |
# Process document in two stages
|
972 |
# Stage 1: Process text chunks and docs (parallel execution)
|
|
|
966 |
}
|
967 |
|
968 |
if not chunks:
|
969 |
+
logger.warning("No document chunks to process")
|
970 |
|
971 |
# Process document in two stages
|
972 |
# Stage 1: Process text chunks and docs (parallel execution)
|