yangdx commited on
Commit
53efba3
·
1 Parent(s): 5c5d027

Add document indexing during FastAPI startup, Fix docx package name in requirements

Browse files

- Initialize DocumentManager earlier
- Add lifespan context manager
- Scan and index documents on startup
- Fix docx package name in requirements

Files changed (1) hide show
  1. lightrag/api/lightrag_server.py +28 -7
lightrag/api/lightrag_server.py CHANGED
@@ -574,6 +574,29 @@ def create_app(args):
574
  # Check if API key is provided either through env var or args
575
  api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
576
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
  # Initialize FastAPI
578
  app = FastAPI(
579
  title="LightRAG API",
@@ -583,6 +606,7 @@ def create_app(args):
583
  else "",
584
  version=__api_version__,
585
  openapi_tags=[{"name": "api"}],
 
586
  )
587
 
588
  # Add CORS middleware
@@ -600,9 +624,6 @@ def create_app(args):
600
  # Create working directory if it doesn't exist
601
  Path(args.working_dir).mkdir(parents=True, exist_ok=True)
602
 
603
- # Initialize document manager
604
- doc_manager = DocumentManager(args.input_dir)
605
-
606
  async def openai_alike_model_complete(
607
  prompt,
608
  system_prompt=None,
@@ -737,8 +758,8 @@ def create_app(args):
737
  content += page.extract_text() + "\n"
738
 
739
  case ".docx":
740
- if not pm.is_installed("docx"):
741
- pm.install("docx")
742
  from docx import Document
743
 
744
  # Word document handling
@@ -971,8 +992,8 @@ def create_app(args):
971
  content += page.extract_text() + "\n"
972
 
973
  case ".docx":
974
- if not pm.is_installed("docx"):
975
- pm.install("docx")
976
  from docx import Document
977
  from io import BytesIO
978
 
 
574
  # Check if API key is provided either through env var or args
575
  api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
576
 
577
+ # Initialize document manager
578
+ doc_manager = DocumentManager(args.input_dir)
579
+
580
+ @asynccontextmanager
581
+ async def lifespan(app: FastAPI):
582
+ """Lifespan context manager for startup and shutdown events"""
583
+ # Startup logic
584
+ try:
585
+ new_files = doc_manager.scan_directory()
586
+ for file_path in new_files:
587
+ try:
588
+ await index_file(file_path)
589
+ except Exception as e:
590
+ trace_exception(e)
591
+ logging.error(f"Error indexing file {file_path}: {str(e)}")
592
+
593
+ logging.info(f"Indexed {len(new_files)} documents from {args.input_dir}")
594
+ except Exception as e:
595
+ logging.error(f"Error during startup indexing: {str(e)}")
596
+ yield
597
+ # Cleanup logic (if needed)
598
+ pass
599
+
600
  # Initialize FastAPI
601
  app = FastAPI(
602
  title="LightRAG API",
 
606
  else "",
607
  version=__api_version__,
608
  openapi_tags=[{"name": "api"}],
609
+ lifespan=lifespan
610
  )
611
 
612
  # Add CORS middleware
 
624
  # Create working directory if it doesn't exist
625
  Path(args.working_dir).mkdir(parents=True, exist_ok=True)
626
 
 
 
 
627
  async def openai_alike_model_complete(
628
  prompt,
629
  system_prompt=None,
 
758
  content += page.extract_text() + "\n"
759
 
760
  case ".docx":
761
+ if not pm.is_installed("python-docx"):
762
+ pm.install("python-docx")
763
  from docx import Document
764
 
765
  # Word document handling
 
992
  content += page.extract_text() + "\n"
993
 
994
  case ".docx":
995
+ if not pm.is_installed("python-docx"):
996
+ pm.install("python-docx")
997
  from docx import Document
998
  from io import BytesIO
999