yangdx
commited on
Commit
·
109988a
1
Parent(s):
648da0e
Add Unicode collation for Chinese file sorting of document scanning
Browse files
lightrag/api/requirements.txt
CHANGED
@@ -16,6 +16,7 @@ python-dotenv
|
|
16 |
python-jose[cryptography]
|
17 |
python-multipart
|
18 |
pytz
|
|
|
19 |
tenacity
|
20 |
tiktoken
|
21 |
uvicorn
|
|
|
16 |
python-jose[cryptography]
|
17 |
python-multipart
|
18 |
pytz
|
19 |
+
pyuca
|
20 |
tenacity
|
21 |
tiktoken
|
22 |
uvicorn
|
lightrag/api/routers/document_routes.py
CHANGED
@@ -3,6 +3,7 @@ This module contains all document-related routes for the LightRAG API.
|
|
3 |
"""
|
4 |
|
5 |
import asyncio
|
|
|
6 |
from lightrag.utils import logger
|
7 |
import aiofiles
|
8 |
import shutil
|
@@ -614,8 +615,12 @@ async def pipeline_index_files(rag: LightRAG, file_paths: List[Path]):
|
|
614 |
try:
|
615 |
enqueued = False
|
616 |
|
|
|
|
|
|
|
|
|
617 |
# Process files sequentially
|
618 |
-
for file_path in
|
619 |
if await pipeline_enqueue_file(rag, file_path):
|
620 |
enqueued = True
|
621 |
|
|
|
3 |
"""
|
4 |
|
5 |
import asyncio
|
6 |
+
from pyuca import Collator
|
7 |
from lightrag.utils import logger
|
8 |
import aiofiles
|
9 |
import shutil
|
|
|
615 |
try:
|
616 |
enqueued = False
|
617 |
|
618 |
+
# Create Collator for Unicode sorting
|
619 |
+
collator = Collator()
|
620 |
+
sorted_file_paths = sorted(file_paths, key=lambda p: collator.sort_key(str(p)))
|
621 |
+
|
622 |
# Process files sequentially
|
623 |
+
for file_path in sorted_file_paths:
|
624 |
if await pipeline_enqueue_file(rag, file_path):
|
625 |
enqueued = True
|
626 |
|
requirements.txt
CHANGED
@@ -11,6 +11,9 @@ pipmaster
|
|
11 |
pydantic
|
12 |
python-dotenv
|
13 |
|
|
|
|
|
|
|
14 |
setuptools
|
15 |
tenacity
|
16 |
|
|
|
11 |
pydantic
|
12 |
python-dotenv
|
13 |
|
14 |
+
# Unicode Collation Algorithm for proper Chinese sorting
|
15 |
+
pyuca
|
16 |
+
|
17 |
setuptools
|
18 |
tenacity
|
19 |
|