gzdaniel commited on
Commit
e3fc792
·
1 Parent(s): 158a79c

Improved empty/whitespace file handling

Browse files

- Better detection of whitespace-only files
- Changed error to warning for empty chunks

lightrag/api/routers/document_routes.py CHANGED
@@ -714,9 +714,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
714
 
715
  # Insert into the RAG queue
716
  if content:
717
-
718
- if content == "\n":
719
- logger.info(f"File appears to be empty. file_paths={file_path.name}")
720
 
721
  await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
722
  logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
 
714
 
715
  # Insert into the RAG queue
716
  if content:
717
+ # Check if content contains only whitespace characters
718
+ if not content.strip():
719
+ logger.warning(f"File contains only whitespace characters. file_paths={file_path.name}")
720
 
721
  await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
722
  logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
lightrag/lightrag.py CHANGED
@@ -966,7 +966,7 @@ class LightRAG:
966
  }
967
 
968
  if not chunks:
969
- logger.error("No document chunks to process")
970
 
971
  # Process document in two stages
972
  # Stage 1: Process text chunks and docs (parallel execution)
 
966
  }
967
 
968
  if not chunks:
969
+ logger.warning("No document chunks to process")
970
 
971
  # Process document in two stages
972
  # Stage 1: Process text chunks and docs (parallel execution)