yangdx commited on
Commit
6dde218
·
1 Parent(s): 25287b8

Remove useless scan progress tracking functionality and related code

Browse files
lightrag/api/routers/document_routes.py CHANGED
@@ -4,7 +4,6 @@ This module contains all document-related routes for the LightRAG API.
4
 
5
  import asyncio
6
  import logging
7
- import os
8
  import aiofiles
9
  import shutil
10
  import traceback
@@ -12,17 +11,12 @@ import pipmaster as pm
12
  from datetime import datetime
13
  from pathlib import Path
14
  from typing import Dict, List, Optional, Any
15
- from ascii_colors import ASCIIColors
16
  from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
17
  from pydantic import BaseModel, Field, field_validator
18
 
19
  from lightrag import LightRAG
20
  from lightrag.base import DocProcessingStatus, DocStatus
21
  from ..utils_api import get_api_key_dependency
22
- from lightrag.kg.shared_storage import (
23
- get_namespace_data,
24
- get_storage_lock,
25
- )
26
 
27
 
28
  router = APIRouter(prefix="/documents", tags=["documents"])
@@ -376,72 +370,19 @@ async def save_temp_file(input_dir: Path, file: UploadFile = File(...)) -> Path:
376
 
377
  async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
378
  """Background task to scan and index documents"""
379
- scan_progress = get_namespace_data("scan_progress")
380
- scan_lock = get_storage_lock()
381
- with scan_lock:
382
- if scan_progress.get("is_scanning", False):
383
- ASCIIColors.info("Skip document scanning(another scanning is active)")
384
- return
385
- scan_progress.update(
386
- {
387
- "is_scanning": True,
388
- "current_file": "",
389
- "indexed_count": 0,
390
- "total_files": 0,
391
- "progress": 0,
392
- }
393
- )
394
-
395
  try:
396
  new_files = doc_manager.scan_directory_for_new_files()
397
  total_files = len(new_files)
398
- scan_progress.update(
399
- {
400
- "current_file": "",
401
- "total_files": total_files,
402
- "indexed_count": 0,
403
- "progress": 0,
404
- }
405
- )
406
-
407
  logging.info(f"Found {total_files} new files to index.")
 
408
  for idx, file_path in enumerate(new_files):
409
  try:
410
- progress = (idx / total_files * 100) if total_files > 0 else 0
411
- scan_progress.update(
412
- {
413
- "current_file": os.path.basename(file_path),
414
- "indexed_count": idx,
415
- "progress": progress,
416
- }
417
- )
418
-
419
  await pipeline_index_file(rag, file_path)
420
-
421
- progress = ((idx + 1) / total_files * 100) if total_files > 0 else 0
422
- scan_progress.update(
423
- {
424
- "current_file": os.path.basename(file_path),
425
- "indexed_count": idx + 1,
426
- "progress": progress,
427
- }
428
- )
429
-
430
  except Exception as e:
431
  logging.error(f"Error indexing file {file_path}: {str(e)}")
432
 
433
  except Exception as e:
434
  logging.error(f"Error during scanning process: {str(e)}")
435
- finally:
436
- scan_progress.update(
437
- {
438
- "is_scanning": False,
439
- "current_file": "",
440
- "indexed_count": 0,
441
- "total_files": 0,
442
- "progress": 0,
443
- }
444
- )
445
 
446
 
447
  def create_document_routes(
@@ -465,20 +406,6 @@ def create_document_routes(
465
  background_tasks.add_task(run_scanning_process, rag, doc_manager)
466
  return {"status": "scanning_started"}
467
 
468
- @router.get("/scan-progress")
469
- async def get_scanning_progress():
470
- """
471
- Get the current progress of the document scanning process.
472
-
473
- Returns:
474
- dict: A dictionary containing the current scanning progress information including:
475
- - is_scanning: Whether a scan is currently in progress
476
- - current_file: The file currently being processed
477
- - indexed_count: Number of files indexed so far
478
- - total_files: Total number of files to process
479
- - progress: Percentage of completion
480
- """
481
- return dict(get_namespace_data("scan_progress"))
482
 
483
  @router.post("/upload", dependencies=[Depends(optional_api_key)])
484
  async def upload_to_input_dir(
 
4
 
5
  import asyncio
6
  import logging
 
7
  import aiofiles
8
  import shutil
9
  import traceback
 
11
  from datetime import datetime
12
  from pathlib import Path
13
  from typing import Dict, List, Optional, Any
 
14
  from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
15
  from pydantic import BaseModel, Field, field_validator
16
 
17
  from lightrag import LightRAG
18
  from lightrag.base import DocProcessingStatus, DocStatus
19
  from ..utils_api import get_api_key_dependency
 
 
 
 
20
 
21
 
22
  router = APIRouter(prefix="/documents", tags=["documents"])
 
370
 
371
  async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
372
  """Background task to scan and index documents"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  try:
374
  new_files = doc_manager.scan_directory_for_new_files()
375
  total_files = len(new_files)
 
 
 
 
 
 
 
 
 
376
  logging.info(f"Found {total_files} new files to index.")
377
+
378
  for idx, file_path in enumerate(new_files):
379
  try:
 
 
 
 
 
 
 
 
 
380
  await pipeline_index_file(rag, file_path)
 
 
 
 
 
 
 
 
 
 
381
  except Exception as e:
382
  logging.error(f"Error indexing file {file_path}: {str(e)}")
383
 
384
  except Exception as e:
385
  logging.error(f"Error during scanning process: {str(e)}")
 
 
 
 
 
 
 
 
 
 
386
 
387
 
388
  def create_document_routes(
 
406
  background_tasks.add_task(run_scanning_process, rag, doc_manager)
407
  return {"status": "scanning_started"}
408
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
  @router.post("/upload", dependencies=[Depends(optional_api_key)])
411
  async def upload_to_input_dir(
lightrag/lightrag.py CHANGED
@@ -276,20 +276,7 @@ class LightRAG:
276
  try_initialize_namespace,
277
  get_namespace_data,
278
  )
279
-
280
  initialize_share_data()
281
- need_init = try_initialize_namespace("scan_progress")
282
- scan_progress = get_namespace_data("scan_progress")
283
- if need_init:
284
- scan_progress.update(
285
- {
286
- "is_scanning": False,
287
- "current_file": "",
288
- "indexed_count": 0,
289
- "total_files": 0,
290
- "progress": 0,
291
- }
292
- )
293
 
294
  if not os.path.exists(self.working_dir):
295
  logger.info(f"Creating working directory {self.working_dir}")
 
276
  try_initialize_namespace,
277
  get_namespace_data,
278
  )
 
279
  initialize_share_data()
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
  if not os.path.exists(self.working_dir):
282
  logger.info(f"Creating working directory {self.working_dir}")