yangdx
commited on
Commit
·
6dde218
1
Parent(s):
25287b8
Remove useless scan progress tracking functionality and related code
Browse files- lightrag/api/routers/document_routes.py +1 -74
- lightrag/lightrag.py +0 -13
lightrag/api/routers/document_routes.py
CHANGED
@@ -4,7 +4,6 @@ This module contains all document-related routes for the LightRAG API.
|
|
4 |
|
5 |
import asyncio
|
6 |
import logging
|
7 |
-
import os
|
8 |
import aiofiles
|
9 |
import shutil
|
10 |
import traceback
|
@@ -12,17 +11,12 @@ import pipmaster as pm
|
|
12 |
from datetime import datetime
|
13 |
from pathlib import Path
|
14 |
from typing import Dict, List, Optional, Any
|
15 |
-
from ascii_colors import ASCIIColors
|
16 |
from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
|
17 |
from pydantic import BaseModel, Field, field_validator
|
18 |
|
19 |
from lightrag import LightRAG
|
20 |
from lightrag.base import DocProcessingStatus, DocStatus
|
21 |
from ..utils_api import get_api_key_dependency
|
22 |
-
from lightrag.kg.shared_storage import (
|
23 |
-
get_namespace_data,
|
24 |
-
get_storage_lock,
|
25 |
-
)
|
26 |
|
27 |
|
28 |
router = APIRouter(prefix="/documents", tags=["documents"])
|
@@ -376,72 +370,19 @@ async def save_temp_file(input_dir: Path, file: UploadFile = File(...)) -> Path:
|
|
376 |
|
377 |
async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
|
378 |
"""Background task to scan and index documents"""
|
379 |
-
scan_progress = get_namespace_data("scan_progress")
|
380 |
-
scan_lock = get_storage_lock()
|
381 |
-
with scan_lock:
|
382 |
-
if scan_progress.get("is_scanning", False):
|
383 |
-
ASCIIColors.info("Skip document scanning(another scanning is active)")
|
384 |
-
return
|
385 |
-
scan_progress.update(
|
386 |
-
{
|
387 |
-
"is_scanning": True,
|
388 |
-
"current_file": "",
|
389 |
-
"indexed_count": 0,
|
390 |
-
"total_files": 0,
|
391 |
-
"progress": 0,
|
392 |
-
}
|
393 |
-
)
|
394 |
-
|
395 |
try:
|
396 |
new_files = doc_manager.scan_directory_for_new_files()
|
397 |
total_files = len(new_files)
|
398 |
-
scan_progress.update(
|
399 |
-
{
|
400 |
-
"current_file": "",
|
401 |
-
"total_files": total_files,
|
402 |
-
"indexed_count": 0,
|
403 |
-
"progress": 0,
|
404 |
-
}
|
405 |
-
)
|
406 |
-
|
407 |
logging.info(f"Found {total_files} new files to index.")
|
|
|
408 |
for idx, file_path in enumerate(new_files):
|
409 |
try:
|
410 |
-
progress = (idx / total_files * 100) if total_files > 0 else 0
|
411 |
-
scan_progress.update(
|
412 |
-
{
|
413 |
-
"current_file": os.path.basename(file_path),
|
414 |
-
"indexed_count": idx,
|
415 |
-
"progress": progress,
|
416 |
-
}
|
417 |
-
)
|
418 |
-
|
419 |
await pipeline_index_file(rag, file_path)
|
420 |
-
|
421 |
-
progress = ((idx + 1) / total_files * 100) if total_files > 0 else 0
|
422 |
-
scan_progress.update(
|
423 |
-
{
|
424 |
-
"current_file": os.path.basename(file_path),
|
425 |
-
"indexed_count": idx + 1,
|
426 |
-
"progress": progress,
|
427 |
-
}
|
428 |
-
)
|
429 |
-
|
430 |
except Exception as e:
|
431 |
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
432 |
|
433 |
except Exception as e:
|
434 |
logging.error(f"Error during scanning process: {str(e)}")
|
435 |
-
finally:
|
436 |
-
scan_progress.update(
|
437 |
-
{
|
438 |
-
"is_scanning": False,
|
439 |
-
"current_file": "",
|
440 |
-
"indexed_count": 0,
|
441 |
-
"total_files": 0,
|
442 |
-
"progress": 0,
|
443 |
-
}
|
444 |
-
)
|
445 |
|
446 |
|
447 |
def create_document_routes(
|
@@ -465,20 +406,6 @@ def create_document_routes(
|
|
465 |
background_tasks.add_task(run_scanning_process, rag, doc_manager)
|
466 |
return {"status": "scanning_started"}
|
467 |
|
468 |
-
@router.get("/scan-progress")
|
469 |
-
async def get_scanning_progress():
|
470 |
-
"""
|
471 |
-
Get the current progress of the document scanning process.
|
472 |
-
|
473 |
-
Returns:
|
474 |
-
dict: A dictionary containing the current scanning progress information including:
|
475 |
-
- is_scanning: Whether a scan is currently in progress
|
476 |
-
- current_file: The file currently being processed
|
477 |
-
- indexed_count: Number of files indexed so far
|
478 |
-
- total_files: Total number of files to process
|
479 |
-
- progress: Percentage of completion
|
480 |
-
"""
|
481 |
-
return dict(get_namespace_data("scan_progress"))
|
482 |
|
483 |
@router.post("/upload", dependencies=[Depends(optional_api_key)])
|
484 |
async def upload_to_input_dir(
|
|
|
4 |
|
5 |
import asyncio
|
6 |
import logging
|
|
|
7 |
import aiofiles
|
8 |
import shutil
|
9 |
import traceback
|
|
|
11 |
from datetime import datetime
|
12 |
from pathlib import Path
|
13 |
from typing import Dict, List, Optional, Any
|
|
|
14 |
from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
|
15 |
from pydantic import BaseModel, Field, field_validator
|
16 |
|
17 |
from lightrag import LightRAG
|
18 |
from lightrag.base import DocProcessingStatus, DocStatus
|
19 |
from ..utils_api import get_api_key_dependency
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
router = APIRouter(prefix="/documents", tags=["documents"])
|
|
|
370 |
|
371 |
async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
|
372 |
"""Background task to scan and index documents"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
try:
|
374 |
new_files = doc_manager.scan_directory_for_new_files()
|
375 |
total_files = len(new_files)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
logging.info(f"Found {total_files} new files to index.")
|
377 |
+
|
378 |
for idx, file_path in enumerate(new_files):
|
379 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
await pipeline_index_file(rag, file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
except Exception as e:
|
382 |
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
383 |
|
384 |
except Exception as e:
|
385 |
logging.error(f"Error during scanning process: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
|
387 |
|
388 |
def create_document_routes(
|
|
|
406 |
background_tasks.add_task(run_scanning_process, rag, doc_manager)
|
407 |
return {"status": "scanning_started"}
|
408 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
409 |
|
410 |
@router.post("/upload", dependencies=[Depends(optional_api_key)])
|
411 |
async def upload_to_input_dir(
|
lightrag/lightrag.py
CHANGED
@@ -276,20 +276,7 @@ class LightRAG:
|
|
276 |
try_initialize_namespace,
|
277 |
get_namespace_data,
|
278 |
)
|
279 |
-
|
280 |
initialize_share_data()
|
281 |
-
need_init = try_initialize_namespace("scan_progress")
|
282 |
-
scan_progress = get_namespace_data("scan_progress")
|
283 |
-
if need_init:
|
284 |
-
scan_progress.update(
|
285 |
-
{
|
286 |
-
"is_scanning": False,
|
287 |
-
"current_file": "",
|
288 |
-
"indexed_count": 0,
|
289 |
-
"total_files": 0,
|
290 |
-
"progress": 0,
|
291 |
-
}
|
292 |
-
)
|
293 |
|
294 |
if not os.path.exists(self.working_dir):
|
295 |
logger.info(f"Creating working directory {self.working_dir}")
|
|
|
276 |
try_initialize_namespace,
|
277 |
get_namespace_data,
|
278 |
)
|
|
|
279 |
initialize_share_data()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
|
281 |
if not os.path.exists(self.working_dir):
|
282 |
logger.info(f"Creating working directory {self.working_dir}")
|