Milin commited on
Commit
d98331c
·
2 Parent(s): f69969b 987b359

Merge branch 'main' into optimize-config-management

Browse files

# Conflicts:
# env.example
# lightrag/api/utils_api.py

README.md CHANGED
@@ -441,11 +441,16 @@ if __name__ == "__main__":
441
  - [Direct OpenAI Example](examples/lightrag_llamaindex_direct_demo.py)
442
  - [LiteLLM Proxy Example](examples/lightrag_llamaindex_litellm_demo.py)
443
 
 
 
444
  ### Conversation History Support
445
 
446
 
447
  LightRAG now supports multi-turn dialogue through the conversation history feature. Here's how to use it:
448
 
 
 
 
449
  ```python
450
  # Create conversation history
451
  conversation_history = [
@@ -468,10 +473,15 @@ response = rag.query(
468
  )
469
  ```
470
 
 
 
471
  ### Custom Prompt Support
472
 
473
  LightRAG now supports custom prompts for fine-tuned control over the system's behavior. Here's how to use it:
474
 
 
 
 
475
  ```python
476
  # Create query parameters
477
  query_param = QueryParam(
@@ -506,6 +516,8 @@ response_custom = rag.query(
506
  print(response_custom)
507
  ```
508
 
 
 
509
  ### Separate Keyword Extraction
510
 
511
  We've introduced a new function `query_with_separate_keyword_extraction` to enhance the keyword extraction capabilities. This function separates the keyword extraction process from the user's prompt, focusing solely on the query to improve the relevance of extracted keywords.
@@ -519,7 +531,8 @@ The function operates by dividing the input into two parts:
519
 
520
  It then performs keyword extraction exclusively on the `user query`. This separation ensures that the extraction process is focused and relevant, unaffected by any additional language in the `prompt`. It also allows the `prompt` to serve purely for response formatting, maintaining the intent and clarity of the user's original question.
521
 
522
- **Usage Example**
 
523
 
524
  This `example` shows how to tailor the function for educational content, focusing on detailed explanations for older students.
525
 
@@ -531,67 +544,6 @@ rag.query_with_separate_keyword_extraction(
531
  )
532
  ```
533
 
534
- ### Insert Custom KG
535
-
536
- ```python
537
- custom_kg = {
538
- "chunks": [
539
- {
540
- "content": "Alice and Bob are collaborating on quantum computing research.",
541
- "source_id": "doc-1"
542
- }
543
- ],
544
- "entities": [
545
- {
546
- "entity_name": "Alice",
547
- "entity_type": "person",
548
- "description": "Alice is a researcher specializing in quantum physics.",
549
- "source_id": "doc-1"
550
- },
551
- {
552
- "entity_name": "Bob",
553
- "entity_type": "person",
554
- "description": "Bob is a mathematician.",
555
- "source_id": "doc-1"
556
- },
557
- {
558
- "entity_name": "Quantum Computing",
559
- "entity_type": "technology",
560
- "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
561
- "source_id": "doc-1"
562
- }
563
- ],
564
- "relationships": [
565
- {
566
- "src_id": "Alice",
567
- "tgt_id": "Bob",
568
- "description": "Alice and Bob are research partners.",
569
- "keywords": "collaboration research",
570
- "weight": 1.0,
571
- "source_id": "doc-1"
572
- },
573
- {
574
- "src_id": "Alice",
575
- "tgt_id": "Quantum Computing",
576
- "description": "Alice conducts research on quantum computing.",
577
- "keywords": "research expertise",
578
- "weight": 1.0,
579
- "source_id": "doc-1"
580
- },
581
- {
582
- "src_id": "Bob",
583
- "tgt_id": "Quantum Computing",
584
- "description": "Bob researches quantum computing.",
585
- "keywords": "research application",
586
- "weight": 1.0,
587
- "source_id": "doc-1"
588
- }
589
- ]
590
- }
591
-
592
- rag.insert_custom_kg(custom_kg)
593
- ```
594
-
595
  </details>
596
 
597
  ## Insert
@@ -683,6 +635,70 @@ rag.insert(text_content.decode('utf-8'))
683
 
684
  </details>
685
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
686
  <details>
687
  <summary><b>Citation Functionality</b></summary>
688
 
@@ -842,7 +858,8 @@ rag.delete_by_doc_id("doc_id")
842
 
843
  LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
844
 
845
- ### Create Entities and Relations
 
846
 
847
  ```python
848
  # Create new entity
@@ -865,7 +882,10 @@ relation = rag.create_relation("Google", "Gmail", {
865
  })
866
  ```
867
 
868
- ### Edit Entities and Relations
 
 
 
869
 
870
  ```python
871
  # Edit an existing entity
@@ -902,6 +922,8 @@ All operations are available in both synchronous and asynchronous versions. The
902
 
903
  These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent.
904
 
 
 
905
  ## Data Export Functions
906
 
907
  ### Overview
@@ -910,7 +932,8 @@ LightRAG allows you to export your knowledge graph data in various formats for a
910
 
911
  ### Export Functions
912
 
913
- #### Basic Usage
 
914
 
915
  ```python
916
  # Basic CSV export (default format)
@@ -920,7 +943,10 @@ rag.export_data("knowledge_graph.csv")
920
  rag.export_data("output.xlsx", file_format="excel")
921
  ```
922
 
923
- #### Different File Formats supported
 
 
 
924
 
925
  ```python
926
  #Export data in CSV format
@@ -935,13 +961,18 @@ rag.export_data("graph_data.md", file_format="md")
935
  # Export data in Text
936
  rag.export_data("graph_data.txt", file_format="txt")
937
  ```
938
- #### Additional Options
 
 
 
939
 
940
  Include vector embeddings in the export (optional):
941
 
942
  ```python
943
  rag.export_data("complete_data.csv", include_vector_data=True)
944
  ```
 
 
945
  ### Data Included in Export
946
 
947
  All exports include:
 
441
  - [Direct OpenAI Example](examples/lightrag_llamaindex_direct_demo.py)
442
  - [LiteLLM Proxy Example](examples/lightrag_llamaindex_litellm_demo.py)
443
 
444
+ </details>
445
+
446
  ### Conversation History Support
447
 
448
 
449
  LightRAG now supports multi-turn dialogue through the conversation history feature. Here's how to use it:
450
 
451
+ <details>
452
+ <summary> <b> Usage Example </b></summary>
453
+
454
  ```python
455
  # Create conversation history
456
  conversation_history = [
 
473
  )
474
  ```
475
 
476
+ </details>
477
+
478
  ### Custom Prompt Support
479
 
480
  LightRAG now supports custom prompts for fine-tuned control over the system's behavior. Here's how to use it:
481
 
482
+ <details>
483
+ <summary> <b> Usage Example </b></summary>
484
+
485
  ```python
486
  # Create query parameters
487
  query_param = QueryParam(
 
516
  print(response_custom)
517
  ```
518
 
519
+ </details>
520
+
521
  ### Separate Keyword Extraction
522
 
523
  We've introduced a new function `query_with_separate_keyword_extraction` to enhance the keyword extraction capabilities. This function separates the keyword extraction process from the user's prompt, focusing solely on the query to improve the relevance of extracted keywords.
 
531
 
532
  It then performs keyword extraction exclusively on the `user query`. This separation ensures that the extraction process is focused and relevant, unaffected by any additional language in the `prompt`. It also allows the `prompt` to serve purely for response formatting, maintaining the intent and clarity of the user's original question.
533
 
534
+ <details>
535
+ <summary> <b> Usage Example </b></summary>
536
 
537
  This `example` shows how to tailor the function for educational content, focusing on detailed explanations for older students.
538
 
 
544
  )
545
  ```
546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  </details>
548
 
549
  ## Insert
 
635
 
636
  </details>
637
 
638
+ <details>
639
+ <summary> <b> Insert Custom KG </b></summary>
640
+
641
+ ```python
642
+ custom_kg = {
643
+ "chunks": [
644
+ {
645
+ "content": "Alice and Bob are collaborating on quantum computing research.",
646
+ "source_id": "doc-1"
647
+ }
648
+ ],
649
+ "entities": [
650
+ {
651
+ "entity_name": "Alice",
652
+ "entity_type": "person",
653
+ "description": "Alice is a researcher specializing in quantum physics.",
654
+ "source_id": "doc-1"
655
+ },
656
+ {
657
+ "entity_name": "Bob",
658
+ "entity_type": "person",
659
+ "description": "Bob is a mathematician.",
660
+ "source_id": "doc-1"
661
+ },
662
+ {
663
+ "entity_name": "Quantum Computing",
664
+ "entity_type": "technology",
665
+ "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
666
+ "source_id": "doc-1"
667
+ }
668
+ ],
669
+ "relationships": [
670
+ {
671
+ "src_id": "Alice",
672
+ "tgt_id": "Bob",
673
+ "description": "Alice and Bob are research partners.",
674
+ "keywords": "collaboration research",
675
+ "weight": 1.0,
676
+ "source_id": "doc-1"
677
+ },
678
+ {
679
+ "src_id": "Alice",
680
+ "tgt_id": "Quantum Computing",
681
+ "description": "Alice conducts research on quantum computing.",
682
+ "keywords": "research expertise",
683
+ "weight": 1.0,
684
+ "source_id": "doc-1"
685
+ },
686
+ {
687
+ "src_id": "Bob",
688
+ "tgt_id": "Quantum Computing",
689
+ "description": "Bob researches quantum computing.",
690
+ "keywords": "research application",
691
+ "weight": 1.0,
692
+ "source_id": "doc-1"
693
+ }
694
+ ]
695
+ }
696
+
697
+ rag.insert_custom_kg(custom_kg)
698
+ ```
699
+
700
+ </details>
701
+
702
  <details>
703
  <summary><b>Citation Functionality</b></summary>
704
 
 
858
 
859
  LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
860
 
861
+ <details>
862
+ <summary> <b> Create Entities and Relations </b></summary>
863
 
864
  ```python
865
  # Create new entity
 
882
  })
883
  ```
884
 
885
+ </details>
886
+
887
+ <details>
888
+ <summary> <b> Edit Entities and Relations </b></summary>
889
 
890
  ```python
891
  # Edit an existing entity
 
922
 
923
  These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent.
924
 
925
+ </details>
926
+
927
  ## Data Export Functions
928
 
929
  ### Overview
 
932
 
933
  ### Export Functions
934
 
935
+ <details>
936
+ <summary> <b> Basic Usage </b></summary>
937
 
938
  ```python
939
  # Basic CSV export (default format)
 
943
  rag.export_data("output.xlsx", file_format="excel")
944
  ```
945
 
946
+ </details>
947
+
948
+ <details>
949
+ <summary> <b> Different File Formats supported </b></summary>
950
 
951
  ```python
952
  #Export data in CSV format
 
961
  # Export data in Text
962
  rag.export_data("graph_data.txt", file_format="txt")
963
  ```
964
+ </details>
965
+
966
+ <details>
967
+ <summary> <b> Additional Options </b></summary>
968
 
969
  Include vector embeddings in the export (optional):
970
 
971
  ```python
972
  rag.export_data("complete_data.csv", include_vector_data=True)
973
  ```
974
+ </details>
975
+
976
  ### Data Included in Export
977
 
978
  All exports include:
env.example CHANGED
@@ -3,9 +3,11 @@
3
  ### Server Configuration
4
  # HOST=0.0.0.0
5
  # PORT=9621
6
- # WORKERS=1
7
- # NAMESPACE_PREFIX=lightrag # separating data from difference Lightrag instances
8
- # MAX_GRAPH_NODES=1000 # Max nodes return from grap retrieval
 
 
9
  # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
10
 
11
  ### Optional SSL Configuration
@@ -13,7 +15,7 @@
13
  # SSL_CERTFILE=/path/to/cert.pem
14
  # SSL_KEYFILE=/path/to/key.pem
15
 
16
- ### Directory Configuration
17
  # WORKING_DIR=<absolute_path_for_working_dir>
18
  # INPUT_DIR=<absolute_path_for_doc_input_dir>
19
 
@@ -23,9 +25,10 @@
23
  ### Logging level
24
  # LOG_LEVEL=INFO
25
  # VERBOSE=False
26
- # LOG_DIR=/path/to/log/directory # Log file directory path, defaults to current working directory
27
- # LOG_MAX_BYTES=10485760 # Log file max size in bytes, defaults to 10MB
28
- # LOG_BACKUP_COUNT=5 # Number of backup files to keep, defaults to 5
 
29
 
30
  ### Settings for RAG query
31
  # HISTORY_TURNS=3
@@ -36,28 +39,37 @@
36
  # MAX_TOKEN_ENTITY_DESC=4000
37
 
38
  ### Settings for document indexing
39
- ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction
40
  SUMMARY_LANGUAGE=English
41
  # CHUNK_SIZE=1200
42
  # CHUNK_OVERLAP_SIZE=100
43
- # MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary
44
- # MAX_PARALLEL_INSERT=2 # Number of parallel processing documents in one patch
45
-
46
- # EMBEDDING_BATCH_NUM=32 # num of chunks send to Embedding in one request
47
- # EMBEDDING_FUNC_MAX_ASYNC=16 # Max concurrency requests for Embedding
 
 
 
 
48
  # MAX_EMBED_TOKENS=8192
49
 
50
- ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
51
- TIMEOUT=150 # Time out in seconds for LLM, None for infinite timeout
 
 
52
  TEMPERATURE=0.5
53
- MAX_ASYNC=4 # Max concurrency requests of LLM
54
- MAX_TOKENS=32768 # Max tokens send to LLM (less than context size of the model)
 
 
55
 
 
56
  LLM_BINDING=ollama
57
  LLM_MODEL=mistral-nemo:latest
58
  LLM_BINDING_API_KEY=your_api_key
59
- ### Ollama example
60
  LLM_BINDING_HOST=http://localhost:11434
 
61
  ### OpenAI alike example
62
  # LLM_BINDING=openai
63
  # LLM_MODEL=gpt-4o
@@ -103,9 +115,10 @@ ORACLE_DSN=localhost:1521/XEPDB1
103
  ORACLE_USER=your_username
104
  ORACLE_PASSWORD='your_password'
105
  ORACLE_CONFIG_DIR=/path/to/oracle/config
106
- #ORACLE_WALLET_LOCATION=/path/to/wallet # optional
107
- #ORACLE_WALLET_PASSWORD='your_password' # optional
108
- #ORACLE_WORKSPACE=default # separating all data from difference Lightrag instances(deprecated, use NAMESPACE_PREFIX in future)
 
109
 
110
  ### TiDB Configuration
111
  TIDB_HOST=localhost
@@ -113,7 +126,8 @@ TIDB_PORT=4000
113
  TIDB_USER=your_username
114
  TIDB_PASSWORD='your_password'
115
  TIDB_DATABASE=your_database
116
- #TIDB_WORKSPACE=default # separating all data from difference Lightrag instances(deprecated, use NAMESPACE_PREFIX in future)
 
117
 
118
  ### PostgreSQL Configuration
119
  POSTGRES_HOST=localhost
@@ -121,7 +135,8 @@ POSTGRES_PORT=5432
121
  POSTGRES_USER=your_username
122
  POSTGRES_PASSWORD='your_password'
123
  POSTGRES_DATABASE=your_database
124
- #POSTGRES_WORKSPACE=default # separating all data from difference Lightrag instances(deprecated, use NAMESPACE_PREFIX in future)
 
125
 
126
  ### Independent AGM Configuration(not for AMG embedded in PostreSQL)
127
  AGE_POSTGRES_DB=
@@ -130,8 +145,9 @@ AGE_POSTGRES_PASSWORD=
130
  AGE_POSTGRES_HOST=
131
  # AGE_POSTGRES_PORT=8529
132
 
 
133
  # AGE Graph Name(apply to PostgreSQL and independent AGM)
134
- # AGE_GRAPH_NAME=lightrag # deprecated, use NAME_SPACE_PREFIX instead
135
 
136
  ### Neo4j Configuration
137
  NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
@@ -141,7 +157,8 @@ NEO4J_PASSWORD='your_password'
141
  ### MongoDB Configuration
142
  MONGO_URI=mongodb://root:root@localhost:27017/
143
  MONGO_DATABASE=LightRAG
144
- MONGODB_GRAPH=false # deprecated (keep for backward compatibility)
 
145
 
146
  ### Milvus Configuration
147
  MILVUS_URI=http://localhost:19530
@@ -158,11 +175,11 @@ QDRANT_URL=http://localhost:16333
158
  REDIS_URI=redis://localhost:6379
159
 
160
  ### For JWT Auth
161
- AUTH_ACCOUNTS='admin:admin123,user1:pass456' # username:password,username:password
162
- TOKEN_SECRET=Your-Key-For-LightRAG-API-Server # JWT key
163
- #TOKEN_EXPIRE_HOURS=4 # Expire duration, default 4
164
- #GUEST_TOKEN_EXPIRE_HOURS=2 # Guest expire duration, default 2
165
- #JWT_ALGORITHM=HS256 # JWT encode algorithm, default HS256
166
 
167
  ### API-Key to access LightRAG Server API
168
  # LIGHTRAG_API_KEY=your-secure-api-key-here
 
3
  ### Server Configuration
4
  # HOST=0.0.0.0
5
  # PORT=9621
6
+ # WORKERS=2
7
+ ### separating data from difference Lightrag instances
8
+ # NAMESPACE_PREFIX=lightrag
9
+ ### Max nodes return from grap retrieval
10
+ # MAX_GRAPH_NODES=1000
11
  # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
12
 
13
  ### Optional SSL Configuration
 
15
  # SSL_CERTFILE=/path/to/cert.pem
16
  # SSL_KEYFILE=/path/to/key.pem
17
 
18
+ ### Directory Configuration (defaults to current working directory)
19
  # WORKING_DIR=<absolute_path_for_working_dir>
20
  # INPUT_DIR=<absolute_path_for_doc_input_dir>
21
 
 
25
  ### Logging level
26
  # LOG_LEVEL=INFO
27
  # VERBOSE=False
28
+ # LOG_MAX_BYTES=10485760
29
+ # LOG_BACKUP_COUNT=5
30
+ ### Logfile location (defaults to current working directory)
31
+ # LOG_DIR=/path/to/log/directory
32
 
33
  ### Settings for RAG query
34
  # HISTORY_TURNS=3
 
39
  # MAX_TOKEN_ENTITY_DESC=4000
40
 
41
  ### Settings for document indexing
42
+ ENABLE_LLM_CACHE_FOR_EXTRACT=true
43
  SUMMARY_LANGUAGE=English
44
  # CHUNK_SIZE=1200
45
  # CHUNK_OVERLAP_SIZE=100
46
+ ### Max tokens for entity or relations summary
47
+ # MAX_TOKEN_SUMMARY=500
48
+ ### Number of parallel processing documents in one patch
49
+ # MAX_PARALLEL_INSERT=2
50
+
51
+ ### Num of chunks send to Embedding in single request
52
+ # EMBEDDING_BATCH_NUM=32
53
+ ### Max concurrency requests for Embedding
54
+ # EMBEDDING_FUNC_MAX_ASYNC=16
55
  # MAX_EMBED_TOKENS=8192
56
 
57
+ ### LLM Configuration
58
+ ### Time out in seconds for LLM, None for infinite timeout
59
+ TIMEOUT=150
60
+ ### Some models like o1-mini require temperature to be set to 1
61
  TEMPERATURE=0.5
62
+ ### Max concurrency requests of LLM
63
+ MAX_ASYNC=4
64
+ ### Max tokens send to LLM (less than context size of the model)
65
+ MAX_TOKENS=32768
66
 
67
+ ### Ollama example (For local services installed with docker, you can use host.docker.internal as host)
68
  LLM_BINDING=ollama
69
  LLM_MODEL=mistral-nemo:latest
70
  LLM_BINDING_API_KEY=your_api_key
 
71
  LLM_BINDING_HOST=http://localhost:11434
72
+
73
  ### OpenAI alike example
74
  # LLM_BINDING=openai
75
  # LLM_MODEL=gpt-4o
 
115
  ORACLE_USER=your_username
116
  ORACLE_PASSWORD='your_password'
117
  ORACLE_CONFIG_DIR=/path/to/oracle/config
118
+ #ORACLE_WALLET_LOCATION=/path/to/wallet
119
+ #ORACLE_WALLET_PASSWORD='your_password'
120
+ ### separating all data from difference Lightrag instances(deprecating, use NAMESPACE_PREFIX in future)
121
+ #ORACLE_WORKSPACE=default
122
 
123
  ### TiDB Configuration
124
  TIDB_HOST=localhost
 
126
  TIDB_USER=your_username
127
  TIDB_PASSWORD='your_password'
128
  TIDB_DATABASE=your_database
129
+ ### separating all data from difference Lightrag instances(deprecating, use NAMESPACE_PREFIX in future)
130
+ #TIDB_WORKSPACE=default
131
 
132
  ### PostgreSQL Configuration
133
  POSTGRES_HOST=localhost
 
135
  POSTGRES_USER=your_username
136
  POSTGRES_PASSWORD='your_password'
137
  POSTGRES_DATABASE=your_database
138
+ ### separating all data from difference Lightrag instances(deprecating, use NAMESPACE_PREFIX in future)
139
+ #POSTGRES_WORKSPACE=default
140
 
141
  ### Independent AGM Configuration(not for AMG embedded in PostreSQL)
142
  AGE_POSTGRES_DB=
 
145
  AGE_POSTGRES_HOST=
146
  # AGE_POSTGRES_PORT=8529
147
 
148
+ ### separating all data from difference Lightrag instances(deprecating, use NAMESPACE_PREFIX in future)
149
  # AGE Graph Name(apply to PostgreSQL and independent AGM)
150
+ # AGE_GRAPH_NAME=lightrag
151
 
152
  ### Neo4j Configuration
153
  NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
 
157
  ### MongoDB Configuration
158
  MONGO_URI=mongodb://root:root@localhost:27017/
159
  MONGO_DATABASE=LightRAG
160
+ ### separating all data from difference Lightrag instances(deprecating, use NAMESPACE_PREFIX in future)
161
+ # MONGODB_GRAPH=false
162
 
163
  ### Milvus Configuration
164
  MILVUS_URI=http://localhost:19530
 
175
  REDIS_URI=redis://localhost:6379
176
 
177
  ### For JWT Auth
178
+ #AUTH_ACCOUNTS='admin:admin123,user1:pass456'
179
+ #TOKEN_SECRET=Your-Key-For-LightRAG-API-Server
180
+ #TOKEN_EXPIRE_HOURS=4
181
+ #GUEST_TOKEN_EXPIRE_HOURS=2
182
+ #JWT_ALGORITHM=HS256
183
 
184
  ### API-Key to access LightRAG Server API
185
  # LIGHTRAG_API_KEY=your-secure-api-key-here
examples/lightrag_gemini_track_token_demo.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install -q -U google-genai to use gemini as a client
2
+
3
+ import os
4
+ import asyncio
5
+ import numpy as np
6
+ import nest_asyncio
7
+ from google import genai
8
+ from google.genai import types
9
+ from dotenv import load_dotenv
10
+ from lightrag.utils import EmbeddingFunc
11
+ from lightrag import LightRAG, QueryParam
12
+ from lightrag.kg.shared_storage import initialize_pipeline_status
13
+ from lightrag.llm.siliconcloud import siliconcloud_embedding
14
+ from lightrag.utils import setup_logger
15
+ from lightrag.utils import TokenTracker
16
+
17
+ setup_logger("lightrag", level="DEBUG")
18
+
19
+ # Apply nest_asyncio to solve event loop issues
20
+ nest_asyncio.apply()
21
+
22
+ load_dotenv()
23
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
24
+ siliconflow_api_key = os.getenv("SILICONFLOW_API_KEY")
25
+
26
+ WORKING_DIR = "./dickens"
27
+
28
+ if not os.path.exists(WORKING_DIR):
29
+ os.mkdir(WORKING_DIR)
30
+
31
+ token_tracker = TokenTracker()
32
+
33
+
34
+ async def llm_model_func(
35
+ prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
36
+ ) -> str:
37
+ # 1. Initialize the GenAI Client with your Gemini API Key
38
+ client = genai.Client(api_key=gemini_api_key)
39
+
40
+ # 2. Combine prompts: system prompt, history, and user prompt
41
+ if history_messages is None:
42
+ history_messages = []
43
+
44
+ combined_prompt = ""
45
+ if system_prompt:
46
+ combined_prompt += f"{system_prompt}\n"
47
+
48
+ for msg in history_messages:
49
+ # Each msg is expected to be a dict: {"role": "...", "content": "..."}
50
+ combined_prompt += f"{msg['role']}: {msg['content']}\n"
51
+
52
+ # Finally, add the new user prompt
53
+ combined_prompt += f"user: {prompt}"
54
+
55
+ # 3. Call the Gemini model
56
+ response = client.models.generate_content(
57
+ model="gemini-2.0-flash",
58
+ contents=[combined_prompt],
59
+ config=types.GenerateContentConfig(
60
+ max_output_tokens=5000, temperature=0, top_k=10
61
+ ),
62
+ )
63
+
64
+ # 4. Get token counts with null safety
65
+ usage = getattr(response, "usage_metadata", None)
66
+ prompt_tokens = getattr(usage, "prompt_token_count", 0) or 0
67
+ completion_tokens = getattr(usage, "candidates_token_count", 0) or 0
68
+ total_tokens = getattr(usage, "total_token_count", 0) or (
69
+ prompt_tokens + completion_tokens
70
+ )
71
+
72
+ token_counts = {
73
+ "prompt_tokens": prompt_tokens,
74
+ "completion_tokens": completion_tokens,
75
+ "total_tokens": total_tokens,
76
+ }
77
+
78
+ token_tracker.add_usage(token_counts)
79
+
80
+ # 5. Return the response text
81
+ return response.text
82
+
83
+
84
+ async def embedding_func(texts: list[str]) -> np.ndarray:
85
+ return await siliconcloud_embedding(
86
+ texts,
87
+ model="BAAI/bge-m3",
88
+ api_key=siliconflow_api_key,
89
+ max_token_size=512,
90
+ )
91
+
92
+
93
+ async def initialize_rag():
94
+ rag = LightRAG(
95
+ working_dir=WORKING_DIR,
96
+ entity_extract_max_gleaning=1,
97
+ enable_llm_cache=True,
98
+ enable_llm_cache_for_entity_extract=True,
99
+ embedding_cache_config={"enabled": True, "similarity_threshold": 0.90},
100
+ llm_model_func=llm_model_func,
101
+ embedding_func=EmbeddingFunc(
102
+ embedding_dim=1024,
103
+ max_token_size=8192,
104
+ func=embedding_func,
105
+ ),
106
+ )
107
+
108
+ await rag.initialize_storages()
109
+ await initialize_pipeline_status()
110
+
111
+ return rag
112
+
113
+
114
+ def main():
115
+ # Initialize RAG instance
116
+ rag = asyncio.run(initialize_rag())
117
+
118
+ with open("./book.txt", "r", encoding="utf-8") as f:
119
+ rag.insert(f.read())
120
+
121
+ # Context Manager Method
122
+ with token_tracker:
123
+ print(
124
+ rag.query(
125
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
126
+ )
127
+ )
128
+
129
+ print(
130
+ rag.query(
131
+ "What are the top themes in this story?", param=QueryParam(mode="local")
132
+ )
133
+ )
134
+
135
+ print(
136
+ rag.query(
137
+ "What are the top themes in this story?",
138
+ param=QueryParam(mode="global"),
139
+ )
140
+ )
141
+
142
+ print(
143
+ rag.query(
144
+ "What are the top themes in this story?",
145
+ param=QueryParam(mode="hybrid"),
146
+ )
147
+ )
148
+
149
+
150
+ if __name__ == "__main__":
151
+ main()
examples/lightrag_siliconcloud_track_token_demo.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ from lightrag import LightRAG, QueryParam
4
+ from lightrag.llm.openai import openai_complete_if_cache
5
+ from lightrag.llm.siliconcloud import siliconcloud_embedding
6
+ from lightrag.utils import EmbeddingFunc
7
+ from lightrag.utils import TokenTracker
8
+ import numpy as np
9
+ from lightrag.kg.shared_storage import initialize_pipeline_status
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+ token_tracker = TokenTracker()
15
+ WORKING_DIR = "./dickens"
16
+
17
+ if not os.path.exists(WORKING_DIR):
18
+ os.mkdir(WORKING_DIR)
19
+
20
+
21
+ async def llm_model_func(
22
+ prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
23
+ ) -> str:
24
+ return await openai_complete_if_cache(
25
+ "Qwen/Qwen2.5-7B-Instruct",
26
+ prompt,
27
+ system_prompt=system_prompt,
28
+ history_messages=history_messages,
29
+ api_key=os.getenv("SILICONFLOW_API_KEY"),
30
+ base_url="https://api.siliconflow.cn/v1/",
31
+ token_tracker=token_tracker,
32
+ **kwargs,
33
+ )
34
+
35
+
36
+ async def embedding_func(texts: list[str]) -> np.ndarray:
37
+ return await siliconcloud_embedding(
38
+ texts,
39
+ model="BAAI/bge-m3",
40
+ api_key=os.getenv("SILICONFLOW_API_KEY"),
41
+ max_token_size=512,
42
+ )
43
+
44
+
45
+ # function test
46
+ async def test_funcs():
47
+ # Context Manager Method
48
+ with token_tracker:
49
+ result = await llm_model_func("How are you?")
50
+ print("llm_model_func: ", result)
51
+
52
+
53
+ asyncio.run(test_funcs())
54
+
55
+
56
+ async def initialize_rag():
57
+ rag = LightRAG(
58
+ working_dir=WORKING_DIR,
59
+ llm_model_func=llm_model_func,
60
+ embedding_func=EmbeddingFunc(
61
+ embedding_dim=1024, max_token_size=512, func=embedding_func
62
+ ),
63
+ )
64
+
65
+ await rag.initialize_storages()
66
+ await initialize_pipeline_status()
67
+
68
+ return rag
69
+
70
+
71
+ def main():
72
+ # Initialize RAG instance
73
+ rag = asyncio.run(initialize_rag())
74
+
75
+ # Reset tracker before processing queries
76
+ token_tracker.reset()
77
+
78
+ with open("./book.txt", "r", encoding="utf-8") as f:
79
+ rag.insert(f.read())
80
+
81
+ print(
82
+ rag.query(
83
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
84
+ )
85
+ )
86
+
87
+ print(
88
+ rag.query(
89
+ "What are the top themes in this story?", param=QueryParam(mode="local")
90
+ )
91
+ )
92
+
93
+ print(
94
+ rag.query(
95
+ "What are the top themes in this story?", param=QueryParam(mode="global")
96
+ )
97
+ )
98
+
99
+ print(
100
+ rag.query(
101
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
102
+ )
103
+ )
104
+
105
+ # Display final token usage after main query
106
+ print("Token usage:", token_tracker.get_usage())
107
+
108
+
109
+ if __name__ == "__main__":
110
+ main()
lightrag/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
2
 
3
- __version__ = "1.3.0"
4
  __author__ = "Zirui Guo"
5
  __url__ = "https://github.com/HKUDS/LightRAG"
 
1
  from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
2
 
3
+ __version__ = "1.3.1"
4
  __author__ = "Zirui Guo"
5
  __url__ = "https://github.com/HKUDS/LightRAG"
lightrag/api/README-zh.md CHANGED
@@ -52,7 +52,8 @@ LLM_BINDING=openai
52
  LLM_MODEL=gpt-4o
53
  LLM_BINDING_HOST=https://api.openai.com/v1
54
  LLM_BINDING_API_KEY=your_api_key
55
- MAX_TOKENS=32768 # 发送给 LLM 的最大 token 数(小于模型上下文大小)
 
56
 
57
  EMBEDDING_BINDING=ollama
58
  EMBEDDING_BINDING_HOST=http://localhost:11434
@@ -68,7 +69,8 @@ LLM_BINDING=ollama
68
  LLM_MODEL=mistral-nemo:latest
69
  LLM_BINDING_HOST=http://localhost:11434
70
  # LLM_BINDING_API_KEY=your_api_key
71
- MAX_TOKENS=8192 # 发送给 LLM 的最大 token 数(基于您的 Ollama 服务器容量)
 
72
 
73
  EMBEDDING_BINDING=ollama
74
  EMBEDDING_BINDING_HOST=http://localhost:11434
@@ -117,9 +119,12 @@ LightRAG 服务器可以在 `Gunicorn + Uvicorn` 预加载模式下运行。Guni
117
  虽然 LightRAG 服务器使用一个工作进程来处理文档索引流程,但通过 Uvicorn 的异步任务支持,可以并行处理多个文件。文档索引速度的瓶颈主要在于 LLM。如果您的 LLM 支持高并发,您可以通过增加 LLM 的并发级别来加速文档索引。以下是几个与并发处理相关的环境变量及其默认值:
118
 
119
  ```
120
- WORKERS=2 # 工作进程数,不大于 (2 x 核心数) + 1
121
- MAX_PARALLEL_INSERT=2 # 一批中并行处理的文件数
122
- MAX_ASYNC=4 # LLM 的最大并发请求数
 
 
 
123
  ```
124
 
125
  ### 将 Lightrag 安装为 Linux 服务
@@ -201,10 +206,9 @@ LightRAG API 服务器使用基于 HS256 算法的 JWT 认证。要启用安全
201
 
202
  ```bash
203
  # JWT 认证
204
- AUTH_USERNAME=admin # 登录名
205
- AUTH_PASSWORD=admin123 # 密码
206
- TOKEN_SECRET=your-key # JWT 密钥
207
- TOKEN_EXPIRE_HOURS=4 # 过期时间
208
  ```
209
 
210
  > 目前仅支持配置一个管理员账户和密码。尚未开发和实现完整的账户系统。
@@ -238,8 +242,11 @@ LLM_BINDING=azure_openai
238
  LLM_BINDING_HOST=your-azure-endpoint
239
  LLM_MODEL=your-model-deployment-name
240
  LLM_BINDING_API_KEY=your-azure-api-key
241
- AZURE_OPENAI_API_VERSION=2024-08-01-preview # 可选,默认为最新版本
242
- EMBEDDING_BINDING=azure_openai # 如果使用 Azure OpenAI 进行嵌入
 
 
 
243
  EMBEDDING_MODEL=your-embedding-deployment-name
244
  ```
245
 
@@ -362,7 +369,47 @@ LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
362
  | --embedding-binding | ollama | 嵌入绑定类型(lollms、ollama、openai、azure_openai) |
363
  | auto-scan-at-startup | - | 扫描输入目录中的新文件并开始索引 |
364
 
365
- ### 使用示例
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
  #### 使用 ollama 默认本地服务器作为 llm 和嵌入后端运行 Lightrag 服务器
368
 
 
52
  LLM_MODEL=gpt-4o
53
  LLM_BINDING_HOST=https://api.openai.com/v1
54
  LLM_BINDING_API_KEY=your_api_key
55
+ ### 发送给 LLM 的最大 token 数(小于模型上下文大小)
56
+ MAX_TOKENS=32768
57
 
58
  EMBEDDING_BINDING=ollama
59
  EMBEDDING_BINDING_HOST=http://localhost:11434
 
69
  LLM_MODEL=mistral-nemo:latest
70
  LLM_BINDING_HOST=http://localhost:11434
71
  # LLM_BINDING_API_KEY=your_api_key
72
+ ### 发送给 LLM 的最大 token 数(基于您的 Ollama 服务器容量)
73
+ MAX_TOKENS=8192
74
 
75
  EMBEDDING_BINDING=ollama
76
  EMBEDDING_BINDING_HOST=http://localhost:11434
 
119
  虽然 LightRAG 服务器使用一个工作进程来处理文档索引流程,但通过 Uvicorn 的异步任务支持,可以并行处理多个文件。文档索引速度的瓶颈主要在于 LLM。如果您的 LLM 支持高并发,您可以通过增加 LLM 的并发级别来加速文档索引。以下是几个与并发处理相关的环境变量及其默认值:
120
 
121
  ```
122
+ ### 工作进程数,数字不大于 (2 x 核心数) + 1
123
+ WORKERS=2
124
+ ### 一批中并行处理的文件数
125
+ MAX_PARALLEL_INSERT=2
126
+ # LLM 的最大并发请求数
127
+ MAX_ASYNC=4
128
  ```
129
 
130
  ### 将 Lightrag 安装为 Linux 服务
 
206
 
207
  ```bash
208
  # JWT 认证
209
+ AUTH_ACCOUNTS='admin:admin123,user1:pass456'
210
+ TOKEN_SECRET='your-key'
211
+ TOKEN_EXPIRE_HOURS=4
 
212
  ```
213
 
214
  > 目前仅支持配置一个管理员账户和密码。尚未开发和实现完整的账户系统。
 
242
  LLM_BINDING_HOST=your-azure-endpoint
243
  LLM_MODEL=your-model-deployment-name
244
  LLM_BINDING_API_KEY=your-azure-api-key
245
+ ### API Version可选,默认为最新版本
246
+ AZURE_OPENAI_API_VERSION=2024-08-01-preview
247
+
248
+ ### 如果使用 Azure OpenAI 进行嵌入
249
+ EMBEDDING_BINDING=azure_openai
250
  EMBEDDING_MODEL=your-embedding-deployment-name
251
  ```
252
 
 
369
  | --embedding-binding | ollama | 嵌入绑定类型(lollms、ollama、openai、azure_openai) |
370
  | auto-scan-at-startup | - | 扫描输入目录中的新文件并开始索引 |
371
 
372
+ ### .env 文件示例
373
+
374
+ ```bash
375
+ ### Server Configuration
376
+ # HOST=0.0.0.0
377
+ PORT=9621
378
+ WORKERS=2
379
+
380
+ ### Settings for document indexing
381
+ ENABLE_LLM_CACHE_FOR_EXTRACT=true
382
+ SUMMARY_LANGUAGE=Chinese
383
+ MAX_PARALLEL_INSERT=2
384
+
385
+ ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
386
+ TIMEOUT=200
387
+ TEMPERATURE=0.0
388
+ MAX_ASYNC=4
389
+ MAX_TOKENS=32768
390
+
391
+ LLM_BINDING=openai
392
+ LLM_MODEL=gpt-4o-mini
393
+ LLM_BINDING_HOST=https://api.openai.com/v1
394
+ LLM_BINDING_API_KEY=your-api-key
395
+
396
+ ### Embedding Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
397
+ EMBEDDING_MODEL=bge-m3:latest
398
+ EMBEDDING_DIM=1024
399
+ EMBEDDING_BINDING=ollama
400
+ EMBEDDING_BINDING_HOST=http://localhost:11434
401
+
402
+ ### For JWT Auth
403
+ # AUTH_ACCOUNTS='admin:admin123,user1:pass456'
404
+ # TOKEN_SECRET=your-key-for-LightRAG-API-Server-xxx
405
+ # TOKEN_EXPIRE_HOURS=48
406
+
407
+ # LIGHTRAG_API_KEY=your-secure-api-key-here-123
408
+ # WHITELIST_PATHS=/api/*
409
+ # WHITELIST_PATHS=/health,/api/*
410
+ ```
411
+
412
+
413
 
414
  #### 使用 ollama 默认本地服务器作为 llm 和嵌入后端运行 Lightrag 服务器
415
 
lightrag/api/README.md CHANGED
@@ -52,7 +52,8 @@ LLM_BINDING=openai
52
  LLM_MODEL=gpt-4o
53
  LLM_BINDING_HOST=https://api.openai.com/v1
54
  LLM_BINDING_API_KEY=your_api_key
55
- MAX_TOKENS=32768 # Max tokens send to LLM (less than model context size)
 
56
 
57
  EMBEDDING_BINDING=ollama
58
  EMBEDDING_BINDING_HOST=http://localhost:11434
@@ -68,7 +69,8 @@ LLM_BINDING=ollama
68
  LLM_MODEL=mistral-nemo:latest
69
  LLM_BINDING_HOST=http://localhost:11434
70
  # LLM_BINDING_API_KEY=your_api_key
71
- MAX_TOKENS=8192 # Max tokens send to LLM (base on your Ollama Server capacity)
 
72
 
73
  EMBEDDING_BINDING=ollama
74
  EMBEDDING_BINDING_HOST=http://localhost:11434
@@ -90,7 +92,9 @@ lightrag-server
90
  ```
91
  lightrag-gunicorn --workers 4
92
  ```
93
- The `.env` file must be placed in the startup directory. Upon launching, the LightRAG Server will create a documents directory (default is `./inputs`) and a data directory (default is `./rag_storage`). This allows you to initiate multiple instances of LightRAG Server from different directories, with each instance configured to listen on a distinct network port.
 
 
94
 
95
  Here are some common used startup parameters:
96
 
@@ -100,6 +104,8 @@ Here are some common used startup parameters:
100
  - `--log-level`: Logging level (default: INFO)
101
  - --input-dir: specifying the directory to scan for documents (default: ./input)
102
 
 
 
103
  ### Auto scan on startup
104
 
105
  When starting any of the servers with the `--auto-scan-at-startup` parameter, the system will automatically:
@@ -117,9 +123,12 @@ The LightRAG Server can operate in the `Gunicorn + Uvicorn` preload mode. Gunico
117
  Though LightRAG Server uses one workers to process the document indexing pipeline, with aysnc task supporting of Uvicorn, multiple files can be processed in parallell. The bottleneck of document indexing speed mainly lies with the LLM. If your LLM supports high concurrency, you can accelerate document indexing by increasing the concurrency level of the LLM. Below are several environment variables related to concurrent processing, along with their default values:
118
 
119
  ```
120
- WORKERS=2 # Num of worker processes, not greater then (2 x number_of_cores) + 1
121
- MAX_PARALLEL_INSERT=2 # Num of parallel files to process in one batch
122
- MAX_ASYNC=4 # Max concurrency requests of LLM
 
 
 
123
  ```
124
 
125
  ### Install Lightrag as a Linux Service
@@ -203,10 +212,9 @@ LightRAG API Server implements JWT-based authentication using HS256 algorithm. T
203
 
204
  ```bash
205
  # For jwt auth
206
- AUTH_USERNAME=admin # login name
207
- AUTH_PASSWORD=admin123 # password
208
- TOKEN_SECRET=your-key # JWT key
209
- TOKEN_EXPIRE_HOURS=4 # expire duration
210
  ```
211
 
212
  > Currently, only the configuration of an administrator account and password is supported. A comprehensive account system is yet to be developed and implemented.
@@ -243,10 +251,12 @@ LLM_BINDING=azure_openai
243
  LLM_BINDING_HOST=your-azure-endpoint
244
  LLM_MODEL=your-model-deployment-name
245
  LLM_BINDING_API_KEY=your-azure-api-key
246
- AZURE_OPENAI_API_VERSION=2024-08-01-preview # optional, defaults to latest version
247
- EMBEDDING_BINDING=azure_openai # if using Azure OpenAI for embeddings
248
- EMBEDDING_MODEL=your-embedding-deployment-name
249
 
 
 
 
250
  ```
251
 
252
 
@@ -370,76 +380,47 @@ You can not change storage implementation selection after you add documents to L
370
  | --embedding-binding | ollama | Embedding binding type (lollms, ollama, openai, azure_openai) |
371
  | auto-scan-at-startup | - | Scan input directory for new files and start indexing |
372
 
373
- ### Example Usage
374
-
375
- #### Running a Lightrag server with ollama default local server as llm and embedding backends
376
-
377
- Ollama is the default backend for both llm and embedding, so by default you can run lightrag-server with no parameters and the default ones will be used. Make sure ollama is installed and is running and default models are already installed on ollama.
378
 
379
  ```bash
380
- # Run lightrag with ollama, mistral-nemo:latest for llm, and bge-m3:latest for embedding
381
- lightrag-server
382
-
383
- # Using an authentication key
384
- lightrag-server --key my-key
385
- ```
 
 
 
 
 
 
 
 
 
386
 
387
- #### Running a Lightrag server with lollms default local server as llm and embedding backends
388
-
389
- ```bash
390
- # Run lightrag with lollms, mistral-nemo:latest for llm, and bge-m3:latest for embedding
391
- # Configure LLM_BINDING=lollms and EMBEDDING_BINDING=lollms in .env or config.ini
392
- lightrag-server
393
-
394
- # Using an authentication key
395
- lightrag-server --key my-key
396
- ```
397
-
398
- #### Running a Lightrag server with openai server as llm and embedding backends
399
-
400
- ```bash
401
- # Run lightrag with openai, GPT-4o-mini for llm, and text-embedding-3-small for embedding
402
- # Configure in .env or config.ini:
403
- # LLM_BINDING=openai
404
- # LLM_MODEL=GPT-4o-mini
405
- # EMBEDDING_BINDING=openai
406
- # EMBEDDING_MODEL=text-embedding-3-small
407
- lightrag-server
408
-
409
- # Using an authentication key
410
- lightrag-server --key my-key
411
- ```
412
-
413
- #### Running a Lightrag server with azure openai server as llm and embedding backends
414
 
415
- ```bash
416
- # Run lightrag with azure_openai
417
- # Configure in .env or config.ini:
418
- # LLM_BINDING=azure_openai
419
- # LLM_MODEL=your-model
420
- # EMBEDDING_BINDING=azure_openai
421
- # EMBEDDING_MODEL=your-embedding-model
422
- lightrag-server
423
 
424
- # Using an authentication key
425
- lightrag-server --key my-key
426
- ```
 
427
 
428
- **Important Notes:**
429
- - For LoLLMs: Make sure the specified models are installed in your LoLLMs instance
430
- - For Ollama: Make sure the specified models are installed in your Ollama instance
431
- - For OpenAI: Ensure you have set up your OPENAI_API_KEY environment variable
432
- - For Azure OpenAI: Build and configure your server as stated in the Prequisites section
433
 
434
- For help on any server, use the --help flag:
435
- ```bash
436
- lightrag-server --help
437
  ```
438
 
439
- Note: If you don't need the API functionality, you can install the base package without API support using:
440
- ```bash
441
- pip install lightrag-hku
442
- ```
443
 
444
  ## API Endpoints
445
 
 
52
  LLM_MODEL=gpt-4o
53
  LLM_BINDING_HOST=https://api.openai.com/v1
54
  LLM_BINDING_API_KEY=your_api_key
55
+ ### Max tokens send to LLM (less than model context size)
56
+ MAX_TOKENS=32768
57
 
58
  EMBEDDING_BINDING=ollama
59
  EMBEDDING_BINDING_HOST=http://localhost:11434
 
69
  LLM_MODEL=mistral-nemo:latest
70
  LLM_BINDING_HOST=http://localhost:11434
71
  # LLM_BINDING_API_KEY=your_api_key
72
+ ### Max tokens send to LLM (base on your Ollama Server capacity)
73
+ MAX_TOKENS=8192
74
 
75
  EMBEDDING_BINDING=ollama
76
  EMBEDDING_BINDING_HOST=http://localhost:11434
 
92
  ```
93
  lightrag-gunicorn --workers 4
94
  ```
95
+ The `.env` file **must be placed in the startup directory**.
96
+
97
+ Upon launching, the LightRAG Server will create a documents directory (default is `./inputs`) and a data directory (default is `./rag_storage`). This allows you to initiate multiple instances of LightRAG Server from different directories, with each instance configured to listen on a distinct network port.
98
 
99
  Here are some common used startup parameters:
100
 
 
104
  - `--log-level`: Logging level (default: INFO)
105
  - --input-dir: specifying the directory to scan for documents (default: ./input)
106
 
107
+ > The requirement for the .env file to be in the startup directory is intentionally designed this way. The purpose is to support users in launching multiple LightRAG instances simultaneously. Allow different .env files for different instances.
108
+
109
  ### Auto scan on startup
110
 
111
  When starting any of the servers with the `--auto-scan-at-startup` parameter, the system will automatically:
 
123
  Though LightRAG Server uses one workers to process the document indexing pipeline, with aysnc task supporting of Uvicorn, multiple files can be processed in parallell. The bottleneck of document indexing speed mainly lies with the LLM. If your LLM supports high concurrency, you can accelerate document indexing by increasing the concurrency level of the LLM. Below are several environment variables related to concurrent processing, along with their default values:
124
 
125
  ```
126
+ ### Num of worker processes, not greater then (2 x number_of_cores) + 1
127
+ WORKERS=2
128
+ ### Num of parallel files to process in one batch
129
+ MAX_PARALLEL_INSERT=2
130
+ ### Max concurrency requests of LLM
131
+ MAX_ASYNC=4
132
  ```
133
 
134
  ### Install Lightrag as a Linux Service
 
212
 
213
  ```bash
214
  # For jwt auth
215
+ AUTH_ACCOUNTS='admin:admin123,user1:pass456'
216
+ TOKEN_SECRET='your-key'
217
+ TOKEN_EXPIRE_HOURS=4
 
218
  ```
219
 
220
  > Currently, only the configuration of an administrator account and password is supported. A comprehensive account system is yet to be developed and implemented.
 
251
  LLM_BINDING_HOST=your-azure-endpoint
252
  LLM_MODEL=your-model-deployment-name
253
  LLM_BINDING_API_KEY=your-azure-api-key
254
+ ### API version is optional, defaults to latest version
255
+ AZURE_OPENAI_API_VERSION=2024-08-01-preview
 
256
 
257
+ ### if using Azure OpenAI for embeddings
258
+ EMBEDDING_BINDING=azure_openai
259
+ EMBEDDING_MODEL=your-embedding-deployment-name
260
  ```
261
 
262
 
 
380
  | --embedding-binding | ollama | Embedding binding type (lollms, ollama, openai, azure_openai) |
381
  | auto-scan-at-startup | - | Scan input directory for new files and start indexing |
382
 
383
+ ### .env Examples
 
 
 
 
384
 
385
  ```bash
386
+ ### Server Configuration
387
+ # HOST=0.0.0.0
388
+ PORT=9621
389
+ WORKERS=2
390
+
391
+ ### Settings for document indexing
392
+ ENABLE_LLM_CACHE_FOR_EXTRACT=true
393
+ SUMMARY_LANGUAGE=Chinese
394
+ MAX_PARALLEL_INSERT=2
395
+
396
+ ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
397
+ TIMEOUT=200
398
+ TEMPERATURE=0.0
399
+ MAX_ASYNC=4
400
+ MAX_TOKENS=32768
401
 
402
+ LLM_BINDING=openai
403
+ LLM_MODEL=gpt-4o-mini
404
+ LLM_BINDING_HOST=https://api.openai.com/v1
405
+ LLM_BINDING_API_KEY=your-api-key
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
 
407
+ ### Embedding Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
408
+ EMBEDDING_MODEL=bge-m3:latest
409
+ EMBEDDING_DIM=1024
410
+ EMBEDDING_BINDING=ollama
411
+ EMBEDDING_BINDING_HOST=http://localhost:11434
 
 
 
412
 
413
+ ### For JWT Auth
414
+ # AUTH_ACCOUNTS='admin:admin123,user1:pass456'
415
+ # TOKEN_SECRET=your-key-for-LightRAG-API-Server-xxx
416
+ # TOKEN_EXPIRE_HOURS=48
417
 
418
+ # LIGHTRAG_API_KEY=your-secure-api-key-here-123
419
+ # WHITELIST_PATHS=/api/*
420
+ # WHITELIST_PATHS=/health,/api/*
 
 
421
 
 
 
 
422
  ```
423
 
 
 
 
 
424
 
425
  ## API Endpoints
426
 
lightrag/api/__init__.py CHANGED
@@ -1 +1 @@
1
- __api_version__ = "1.2.7"
 
1
+ __api_version__ = "1.2.8"
lightrag/api/auth.py CHANGED
@@ -7,7 +7,10 @@ from pydantic import BaseModel
7
 
8
  from .config import global_args
9
 
10
- load_dotenv()
 
 
 
11
 
12
 
13
  class TokenPayload(BaseModel):
 
7
 
8
  from .config import global_args
9
 
10
+ # use the .env that is inside the current folder
11
+ # allows to use different .env file for each lightrag instance
12
+ # the OS environment variables take precedence over the .env file
13
+ load_dotenv(dotenv_path=".env", override=False)
14
 
15
 
16
  class TokenPayload(BaseModel):
lightrag/api/lightrag_server.py CHANGED
@@ -50,15 +50,18 @@ from lightrag.kg.shared_storage import (
50
  from fastapi.security import OAuth2PasswordRequestForm
51
  from lightrag.api.auth import auth_handler
52
 
53
- # Load environment variables
54
- # Updated to use the .env that is inside the current folder
55
- # This update allows the user to put a different.env file for each lightrag folder
56
- load_dotenv(".env")
57
 
58
  # Initialize config parser
59
  config = configparser.ConfigParser()
60
  config.read("config.ini")
61
 
 
 
 
62
 
63
  def create_app(args):
64
  # Setup logging
@@ -429,9 +432,7 @@ def create_app(args):
429
  try:
430
  pipeline_status = await get_namespace_data("pipeline_status")
431
 
432
- username = os.getenv("AUTH_USERNAME")
433
- password = os.getenv("AUTH_PASSWORD")
434
- if not (username and password):
435
  auth_mode = "disabled"
436
  else:
437
  auth_mode = "enabled"
 
50
  from fastapi.security import OAuth2PasswordRequestForm
51
  from lightrag.api.auth import auth_handler
52
 
53
+ # use the .env that is inside the current folder
54
+ # allows to use different .env file for each lightrag instance
55
+ # the OS environment variables take precedence over the .env file
56
+ load_dotenv(dotenv_path=".env", override=False)
57
 
58
  # Initialize config parser
59
  config = configparser.ConfigParser()
60
  config.read("config.ini")
61
 
62
+ # Global authentication configuration
63
+ auth_configured = bool(auth_handler.accounts)
64
+
65
 
66
  def create_app(args):
67
  # Setup logging
 
432
  try:
433
  pipeline_status = await get_namespace_data("pipeline_status")
434
 
435
+ if not auth_configured:
 
 
436
  auth_mode = "disabled"
437
  else:
438
  auth_mode = "enabled"
lightrag/api/routers/document_routes.py CHANGED
@@ -540,6 +540,7 @@ def create_document_routes(
540
 
541
  Returns:
542
  InsertResponse: A response object containing the upload status and a message.
 
543
 
544
  Raises:
545
  HTTPException: If the file type is not supported (400) or other errors occur (500).
@@ -552,6 +553,13 @@ def create_document_routes(
552
  )
553
 
554
  file_path = doc_manager.input_dir / file.filename
 
 
 
 
 
 
 
555
  with open(file_path, "wb") as buffer:
556
  shutil.copyfileobj(file.file, buffer)
557
 
 
540
 
541
  Returns:
542
  InsertResponse: A response object containing the upload status and a message.
543
+ status can be "success", "duplicated", or error is thrown.
544
 
545
  Raises:
546
  HTTPException: If the file type is not supported (400) or other errors occur (500).
 
553
  )
554
 
555
  file_path = doc_manager.input_dir / file.filename
556
+ # Check if file already exists
557
+ if file_path.exists():
558
+ return InsertResponse(
559
+ status="duplicated",
560
+ message=f"File '{file.filename}' already exists in the input directory.",
561
+ )
562
+
563
  with open(file_path, "wb") as buffer:
564
  shutil.copyfileobj(file.file, buffer)
565
 
lightrag/api/run_with_gunicorn.py CHANGED
@@ -11,9 +11,10 @@ from lightrag.api.utils_api import parse_args, display_splash_screen, check_env_
11
  from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
12
  from dotenv import load_dotenv
13
 
14
- # Updated to use the .env that is inside the current folder
15
- # This update allows the user to put a different.env file for each lightrag folder
16
- load_dotenv(".env")
 
17
 
18
 
19
  def check_and_install_dependencies():
 
11
  from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
12
  from dotenv import load_dotenv
13
 
14
+ # use the .env that is inside the current folder
15
+ # allows to use different .env file for each lightrag instance
16
+ # the OS environment variables take precedence over the .env file
17
+ load_dotenv(dotenv_path=".env", override=False)
18
 
19
 
20
  def check_and_install_dependencies():
lightrag/api/utils_api.py CHANGED
@@ -2,18 +2,17 @@
2
  Utility functions for the LightRAG API.
3
  """
4
 
5
- import argparse
6
  import os
7
- import sys
8
  from typing import Optional, List, Tuple
9
-
10
  from ascii_colors import ASCIIColors
11
- from dotenv import load_dotenv
 
12
  from fastapi import HTTPException, Security, Request, status
 
13
  from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
14
  from starlette.status import HTTP_403_FORBIDDEN
15
-
16
- from lightrag.api import __api_version__
17
  from .auth import auth_handler
18
  from .config import ollama_server_infos
19
  from ..prompt import PROMPTS
@@ -25,9 +24,7 @@ def check_env_file():
25
  Returns True if should continue, False if should exit.
26
  """
27
  if not os.path.exists(".env"):
28
- warning_msg = (
29
- "Warning: .env file not found. Some features may not work properly."
30
- )
31
  ASCIIColors.yellow(warning_msg)
32
 
33
  # Check if running in interactive terminal
@@ -39,8 +36,10 @@ def check_env_file():
39
  return True
40
 
41
 
42
- # Load environment variables
43
- load_dotenv()
 
 
44
 
45
  # Get whitelist paths from environment variable, only once during initialization
46
  default_whitelist = "/health,/api/*"
@@ -182,7 +181,7 @@ def display_splash_screen(args: argparse.Namespace) -> None:
182
  # Banner
183
  ASCIIColors.cyan(f"""
184
  ╔══════════════════════════════════════════════════════════════╗
185
- ║ 🚀 LightRAG Server v{__api_version__}
186
  ║ Fast, Lightweight RAG Server Implementation ║
187
  ╚══════════════════════════════════════════════════════════════╝
188
  """)
 
2
  Utility functions for the LightRAG API.
3
  """
4
 
 
5
  import os
6
+ import argparse
7
  from typing import Optional, List, Tuple
8
+ import sys
9
  from ascii_colors import ASCIIColors
10
+ from lightrag.api import __api_version__ as api_version
11
+ from lightrag import __version__ as core_version
12
  from fastapi import HTTPException, Security, Request, status
13
+ from dotenv import load_dotenv
14
  from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
15
  from starlette.status import HTTP_403_FORBIDDEN
 
 
16
  from .auth import auth_handler
17
  from .config import ollama_server_infos
18
  from ..prompt import PROMPTS
 
24
  Returns True if should continue, False if should exit.
25
  """
26
  if not os.path.exists(".env"):
27
+ warning_msg = "Warning: Startup directory must contain .env file for multi-instance support."
 
 
28
  ASCIIColors.yellow(warning_msg)
29
 
30
  # Check if running in interactive terminal
 
36
  return True
37
 
38
 
39
+ # use the .env that is inside the current folder
40
+ # allows to use different .env file for each lightrag instance
41
+ # the OS environment variables take precedence over the .env file
42
+ load_dotenv(dotenv_path=".env", override=False)
43
 
44
  # Get whitelist paths from environment variable, only once during initialization
45
  default_whitelist = "/health,/api/*"
 
181
  # Banner
182
  ASCIIColors.cyan(f"""
183
  ╔══════════════════════════════════════════════════════════════╗
184
+ ║ 🚀 LightRAG Server v{core_version}/{api_version}
185
  ║ Fast, Lightweight RAG Server Implementation ║
186
  ╚══════════════════════════════════════════════════════════════╝
187
  """)
lightrag/api/webui/assets/index-CD5HxTy1.css ADDED
Binary file (55.1 kB). View file
 
lightrag/api/webui/assets/index-CbzkrOyx.css DELETED
Binary file (54.9 kB)
 
lightrag/api/webui/assets/{index-DTDDxtXc.js → index-raheqJeu.js} RENAMED
Binary files a/lightrag/api/webui/assets/index-DTDDxtXc.js and b/lightrag/api/webui/assets/index-raheqJeu.js differ
 
lightrag/api/webui/index.html CHANGED
Binary files a/lightrag/api/webui/index.html and b/lightrag/api/webui/index.html differ
 
lightrag/base.py CHANGED
@@ -16,7 +16,10 @@ import numpy as np
16
  from .utils import EmbeddingFunc
17
  from .types import KnowledgeGraph
18
 
19
- load_dotenv()
 
 
 
20
 
21
 
22
  class TextChunkSchema(TypedDict):
 
16
  from .utils import EmbeddingFunc
17
  from .types import KnowledgeGraph
18
 
19
+ # use the .env that is inside the current folder
20
+ # allows to use different .env file for each lightrag instance
21
+ # the OS environment variables take precedence over the .env file
22
+ load_dotenv(dotenv_path=".env", override=False)
23
 
24
 
25
  class TextChunkSchema(TypedDict):
lightrag/lightrag.py CHANGED
@@ -55,8 +55,10 @@ from .utils import (
55
  from .types import KnowledgeGraph
56
  from dotenv import load_dotenv
57
 
58
- # Load environment variables
59
- load_dotenv(override=True)
 
 
60
 
61
  # TODO: TO REMOVE @Yannick
62
  config = configparser.ConfigParser()
 
55
  from .types import KnowledgeGraph
56
  from dotenv import load_dotenv
57
 
58
+ # use the .env that is inside the current folder
59
+ # allows to use different .env file for each lightrag instance
60
+ # the OS environment variables take precedence over the .env file
61
+ load_dotenv(dotenv_path=".env", override=False)
62
 
63
  # TODO: TO REMOVE @Yannick
64
  config = configparser.ConfigParser()
lightrag/llm/openai.py CHANGED
@@ -58,6 +58,7 @@ async def openai_complete_if_cache(
58
  history_messages: list[dict[str, Any]] | None = None,
59
  base_url: str | None = None,
60
  api_key: str | None = None,
 
61
  **kwargs: Any,
62
  ) -> str:
63
  if history_messages is None:
@@ -89,11 +90,13 @@ async def openai_complete_if_cache(
89
  messages.extend(history_messages)
90
  messages.append({"role": "user", "content": prompt})
91
 
92
- logger.debug("===== Sending Query to LLM =====")
93
  logger.debug(f"Model: {model} Base URL: {base_url}")
94
  logger.debug(f"Additional kwargs: {kwargs}")
95
- verbose_debug(f"Query: {prompt}")
96
  verbose_debug(f"System prompt: {system_prompt}")
 
 
97
 
98
  try:
99
  if "response_format" in kwargs:
@@ -154,6 +157,18 @@ async def openai_complete_if_cache(
154
 
155
  if r"\u" in content:
156
  content = safe_unicode_decode(content.encode("utf-8"))
 
 
 
 
 
 
 
 
 
 
 
 
157
  return content
158
 
159
 
 
58
  history_messages: list[dict[str, Any]] | None = None,
59
  base_url: str | None = None,
60
  api_key: str | None = None,
61
+ token_tracker: Any | None = None,
62
  **kwargs: Any,
63
  ) -> str:
64
  if history_messages is None:
 
90
  messages.extend(history_messages)
91
  messages.append({"role": "user", "content": prompt})
92
 
93
+ logger.debug("===== Entering func of LLM =====")
94
  logger.debug(f"Model: {model} Base URL: {base_url}")
95
  logger.debug(f"Additional kwargs: {kwargs}")
96
+ logger.debug(f"Num of history messages: {len(history_messages)}")
97
  verbose_debug(f"System prompt: {system_prompt}")
98
+ verbose_debug(f"Query: {prompt}")
99
+ logger.debug("===== Sending Query to LLM =====")
100
 
101
  try:
102
  if "response_format" in kwargs:
 
157
 
158
  if r"\u" in content:
159
  content = safe_unicode_decode(content.encode("utf-8"))
160
+
161
+ if token_tracker and hasattr(response, "usage"):
162
+ token_counts = {
163
+ "prompt_tokens": getattr(response.usage, "prompt_tokens", 0),
164
+ "completion_tokens": getattr(response.usage, "completion_tokens", 0),
165
+ "total_tokens": getattr(response.usage, "total_tokens", 0),
166
+ }
167
+ token_tracker.add_usage(token_counts)
168
+
169
+ logger.debug(f"Response content len: {len(content)}")
170
+ verbose_debug(f"Response: {response}")
171
+
172
  return content
173
 
174
 
lightrag/operate.py CHANGED
@@ -38,8 +38,10 @@ from .prompt import GRAPH_FIELD_SEP, PROMPTS
38
  import time
39
  from dotenv import load_dotenv
40
 
41
- # Load environment variables
42
- load_dotenv(override=True)
 
 
43
 
44
 
45
  def chunking_by_token_size(
@@ -589,7 +591,7 @@ async def extract_entities(
589
  processed_chunks += 1
590
  entities_count = len(maybe_nodes)
591
  relations_count = len(maybe_edges)
592
- log_message = f" Chunk {processed_chunks}/{total_chunks}: extracted {entities_count} entities and {relations_count} relationships (deduplicated)"
593
  logger.info(log_message)
594
  if pipeline_status is not None:
595
  async with pipeline_status_lock:
@@ -654,7 +656,7 @@ async def extract_entities(
654
  pipeline_status["latest_message"] = log_message
655
  pipeline_status["history_messages"].append(log_message)
656
 
657
- log_message = f"Extracted {len(all_entities_data)} entities and {len(all_relationships_data)} relationships (deduplicated)"
658
  logger.info(log_message)
659
  if pipeline_status is not None:
660
  async with pipeline_status_lock:
@@ -1038,7 +1040,7 @@ async def mix_kg_vector_query(
1038
  # Include time information in content
1039
  formatted_chunks = []
1040
  for c in maybe_trun_chunks:
1041
- chunk_text = c["content"]
1042
  if c["created_at"]:
1043
  chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
1044
  formatted_chunks.append(chunk_text)
@@ -1334,9 +1336,9 @@ async def _get_node_data(
1334
  )
1335
  relations_context = list_of_list_to_csv(relations_section_list)
1336
 
1337
- text_units_section_list = [["id", "content"]]
1338
  for i, t in enumerate(use_text_units):
1339
- text_units_section_list.append([i, t["content"]])
1340
  text_units_context = list_of_list_to_csv(text_units_section_list)
1341
  return entities_context, relations_context, text_units_context
1342
 
@@ -1597,9 +1599,9 @@ async def _get_edge_data(
1597
  )
1598
  entities_context = list_of_list_to_csv(entites_section_list)
1599
 
1600
- text_units_section_list = [["id", "content"]]
1601
  for i, t in enumerate(use_text_units):
1602
- text_units_section_list.append([i, t["content"]])
1603
  text_units_context = list_of_list_to_csv(text_units_section_list)
1604
  return entities_context, relations_context, text_units_context
1605
 
@@ -1785,7 +1787,12 @@ async def naive_query(
1785
  f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
1786
  )
1787
 
1788
- section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
 
 
 
 
 
1789
 
1790
  if query_param.only_need_context:
1791
  return section
 
38
  import time
39
  from dotenv import load_dotenv
40
 
41
+ # use the .env that is inside the current folder
42
+ # allows to use different .env file for each lightrag instance
43
+ # the OS environment variables take precedence over the .env file
44
+ load_dotenv(dotenv_path=".env", override=False)
45
 
46
 
47
  def chunking_by_token_size(
 
591
  processed_chunks += 1
592
  entities_count = len(maybe_nodes)
593
  relations_count = len(maybe_edges)
594
+ log_message = f" Chk {processed_chunks}/{total_chunks}: extracted {entities_count} Ent + {relations_count} Rel (deduplicated)"
595
  logger.info(log_message)
596
  if pipeline_status is not None:
597
  async with pipeline_status_lock:
 
656
  pipeline_status["latest_message"] = log_message
657
  pipeline_status["history_messages"].append(log_message)
658
 
659
+ log_message = f"Extracted {len(all_entities_data)} entities + {len(all_relationships_data)} relationships (deduplicated)"
660
  logger.info(log_message)
661
  if pipeline_status is not None:
662
  async with pipeline_status_lock:
 
1040
  # Include time information in content
1041
  formatted_chunks = []
1042
  for c in maybe_trun_chunks:
1043
+ chunk_text = "File path: " + c["file_path"] + "\n" + c["content"]
1044
  if c["created_at"]:
1045
  chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
1046
  formatted_chunks.append(chunk_text)
 
1336
  )
1337
  relations_context = list_of_list_to_csv(relations_section_list)
1338
 
1339
+ text_units_section_list = [["id", "content", "file_path"]]
1340
  for i, t in enumerate(use_text_units):
1341
+ text_units_section_list.append([i, t["content"], t["file_path"]])
1342
  text_units_context = list_of_list_to_csv(text_units_section_list)
1343
  return entities_context, relations_context, text_units_context
1344
 
 
1599
  )
1600
  entities_context = list_of_list_to_csv(entites_section_list)
1601
 
1602
+ text_units_section_list = [["id", "content", "file_path"]]
1603
  for i, t in enumerate(use_text_units):
1604
+ text_units_section_list.append([i, t["content"], t["file_path"]])
1605
  text_units_context = list_of_list_to_csv(text_units_section_list)
1606
  return entities_context, relations_context, text_units_context
1607
 
 
1787
  f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
1788
  )
1789
 
1790
+ section = "\n--New Chunk--\n".join(
1791
+ [
1792
+ "File path: " + c["file_path"] + "\n" + c["content"]
1793
+ for c in maybe_trun_chunks
1794
+ ]
1795
+ )
1796
 
1797
  if query_param.only_need_context:
1798
  return section
lightrag/prompt.py CHANGED
@@ -222,7 +222,7 @@ When handling relationships with timestamps:
222
  - Use markdown formatting with appropriate section headings
223
  - Please respond in the same language as the user's question.
224
  - Ensure the response maintains continuity with the conversation history.
225
- - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] Source content (File: file_path)
226
  - If you don't know the answer, just say so.
227
  - Do not make anything up. Do not include information not provided by the Knowledge Base."""
228
 
@@ -320,7 +320,7 @@ When handling content with timestamps:
320
  - Use markdown formatting with appropriate section headings
321
  - Please respond in the same language as the user's question.
322
  - Ensure the response maintains continuity with the conversation history.
323
- - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] Source content (File: file_path)
324
  - If you don't know the answer, just say so.
325
  - Do not include information not provided by the Document Chunks."""
326
 
@@ -382,6 +382,6 @@ When handling information with timestamps:
382
  - Ensure the response maintains continuity with the conversation history.
383
  - Organize answer in sections focusing on one main point or aspect of the answer
384
  - Use clear and descriptive section titles that reflect the content
385
- - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] Source content (File: file_path)
386
  - If you don't know the answer, just say so. Do not make anything up.
387
  - Do not include information not provided by the Data Sources."""
 
222
  - Use markdown formatting with appropriate section headings
223
  - Please respond in the same language as the user's question.
224
  - Ensure the response maintains continuity with the conversation history.
225
+ - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] file_path
226
  - If you don't know the answer, just say so.
227
  - Do not make anything up. Do not include information not provided by the Knowledge Base."""
228
 
 
320
  - Use markdown formatting with appropriate section headings
321
  - Please respond in the same language as the user's question.
322
  - Ensure the response maintains continuity with the conversation history.
323
+ - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] file_path
324
  - If you don't know the answer, just say so.
325
  - Do not include information not provided by the Document Chunks."""
326
 
 
382
  - Ensure the response maintains continuity with the conversation history.
383
  - Organize answer in sections focusing on one main point or aspect of the answer
384
  - Use clear and descriptive section titles that reflect the content
385
+ - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] file_path
386
  - If you don't know the answer, just say so. Do not make anything up.
387
  - Do not include information not provided by the Data Sources."""
lightrag/utils.py CHANGED
@@ -19,9 +19,10 @@ import tiktoken
19
  from lightrag.prompt import PROMPTS
20
  from dotenv import load_dotenv
21
 
22
- # Load environment variables
23
- load_dotenv(override=True)
24
-
 
25
 
26
  VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
27
 
@@ -46,7 +47,7 @@ def verbose_debug(msg: str, *args, **kwargs):
46
  formatted_msg = msg
47
  # Then truncate the formatted message
48
  truncated_msg = (
49
- formatted_msg[:50] + "..." if len(formatted_msg) > 50 else formatted_msg
50
  )
51
  logger.debug(truncated_msg, **kwargs)
52
 
@@ -953,3 +954,60 @@ def check_storage_env_vars(storage_name: str) -> None:
953
  f"Storage implementation '{storage_name}' requires the following "
954
  f"environment variables: {', '.join(missing_vars)}"
955
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  from lightrag.prompt import PROMPTS
20
  from dotenv import load_dotenv
21
 
22
+ # use the .env that is inside the current folder
23
+ # allows to use different .env file for each lightrag instance
24
+ # the OS environment variables take precedence over the .env file
25
+ load_dotenv(dotenv_path=".env", override=False)
26
 
27
  VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
28
 
 
47
  formatted_msg = msg
48
  # Then truncate the formatted message
49
  truncated_msg = (
50
+ formatted_msg[:100] + "..." if len(formatted_msg) > 100 else formatted_msg
51
  )
52
  logger.debug(truncated_msg, **kwargs)
53
 
 
954
  f"Storage implementation '{storage_name}' requires the following "
955
  f"environment variables: {', '.join(missing_vars)}"
956
  )
957
+
958
+
959
+ class TokenTracker:
960
+ """Track token usage for LLM calls."""
961
+
962
+ def __init__(self):
963
+ self.reset()
964
+
965
+ def __enter__(self):
966
+ self.reset()
967
+ return self
968
+
969
+ def __exit__(self, exc_type, exc_val, exc_tb):
970
+ print(self)
971
+
972
+ def reset(self):
973
+ self.prompt_tokens = 0
974
+ self.completion_tokens = 0
975
+ self.total_tokens = 0
976
+ self.call_count = 0
977
+
978
+ def add_usage(self, token_counts):
979
+ """Add token usage from one LLM call.
980
+
981
+ Args:
982
+ token_counts: A dictionary containing prompt_tokens, completion_tokens, total_tokens
983
+ """
984
+ self.prompt_tokens += token_counts.get("prompt_tokens", 0)
985
+ self.completion_tokens += token_counts.get("completion_tokens", 0)
986
+
987
+ # If total_tokens is provided, use it directly; otherwise calculate the sum
988
+ if "total_tokens" in token_counts:
989
+ self.total_tokens += token_counts["total_tokens"]
990
+ else:
991
+ self.total_tokens += token_counts.get(
992
+ "prompt_tokens", 0
993
+ ) + token_counts.get("completion_tokens", 0)
994
+
995
+ self.call_count += 1
996
+
997
+ def get_usage(self):
998
+ """Get current usage statistics."""
999
+ return {
1000
+ "prompt_tokens": self.prompt_tokens,
1001
+ "completion_tokens": self.completion_tokens,
1002
+ "total_tokens": self.total_tokens,
1003
+ "call_count": self.call_count,
1004
+ }
1005
+
1006
+ def __str__(self):
1007
+ usage = self.get_usage()
1008
+ return (
1009
+ f"LLM call count: {usage['call_count']}, "
1010
+ f"Prompt tokens: {usage['prompt_tokens']}, "
1011
+ f"Completion tokens: {usage['completion_tokens']}, "
1012
+ f"Total tokens: {usage['total_tokens']}"
1013
+ )
lightrag_webui/src/api/lightrag.ts CHANGED
@@ -109,7 +109,7 @@ export type QueryResponse = {
109
  }
110
 
111
  export type DocActionResponse = {
112
- status: 'success' | 'partial_success' | 'failure'
113
  message: string
114
  }
115
 
 
109
  }
110
 
111
  export type DocActionResponse = {
112
+ status: 'success' | 'partial_success' | 'failure' | 'duplicated'
113
  message: string
114
  }
115
 
lightrag_webui/src/components/documents/UploadDocumentsDialog.tsx CHANGED
@@ -1,4 +1,5 @@
1
  import { useState, useCallback } from 'react'
 
2
  import Button from '@/components/ui/Button'
3
  import {
4
  Dialog,
@@ -23,57 +24,132 @@ export default function UploadDocumentsDialog() {
23
  const [progresses, setProgresses] = useState<Record<string, number>>({})
24
  const [fileErrors, setFileErrors] = useState<Record<string, string>>({})
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  const handleDocumentsUpload = useCallback(
27
  async (filesToUpload: File[]) => {
28
  setIsUploading(true)
29
- setFileErrors({})
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  try {
32
- toast.promise(
33
- (async () => {
 
 
 
34
  try {
35
- await Promise.all(
36
- filesToUpload.map(async (file) => {
37
- try {
38
- const result = await uploadDocument(file, (percentCompleted: number) => {
39
- console.debug(t('documentPanel.uploadDocuments.single.uploading', { name: file.name, percent: percentCompleted }))
40
- setProgresses((pre) => ({
41
- ...pre,
42
- [file.name]: percentCompleted
43
- }))
44
- })
45
-
46
- if (result.status !== 'success') {
47
- setFileErrors(prev => ({
48
- ...prev,
49
- [file.name]: result.message
50
- }))
51
- }
52
- } catch (err) {
53
- setFileErrors(prev => ({
54
- ...prev,
55
- [file.name]: errorMessage(err)
56
- }))
57
- }
58
- })
59
- )
60
- } catch (error) {
61
- console.error('Upload failed:', error)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
- })(),
64
- {
65
- loading: t('documentPanel.uploadDocuments.batch.uploading'),
66
- success: t('documentPanel.uploadDocuments.batch.success'),
67
- error: t('documentPanel.uploadDocuments.batch.error')
68
- }
69
  )
 
 
 
 
 
 
 
 
 
 
70
  } catch (err) {
71
- toast.error(t('documentPanel.uploadDocuments.generalError', { error: errorMessage(err) }))
 
72
  } finally {
73
  setIsUploading(false)
74
  }
75
  },
76
- [setIsUploading, setProgresses, t]
77
  )
78
 
79
  return (
@@ -107,6 +183,7 @@ export default function UploadDocumentsDialog() {
107
  maxSize={200 * 1024 * 1024}
108
  description={t('documentPanel.uploadDocuments.fileTypes')}
109
  onUpload={handleDocumentsUpload}
 
110
  progresses={progresses}
111
  fileErrors={fileErrors}
112
  disabled={isUploading}
 
1
  import { useState, useCallback } from 'react'
2
+ import { FileRejection } from 'react-dropzone'
3
  import Button from '@/components/ui/Button'
4
  import {
5
  Dialog,
 
24
  const [progresses, setProgresses] = useState<Record<string, number>>({})
25
  const [fileErrors, setFileErrors] = useState<Record<string, string>>({})
26
 
27
+ const handleRejectedFiles = useCallback(
28
+ (rejectedFiles: FileRejection[]) => {
29
+ // Process rejected files and add them to fileErrors
30
+ rejectedFiles.forEach(({ file, errors }) => {
31
+ // Get the first error message
32
+ let errorMsg = errors[0]?.message || t('documentPanel.uploadDocuments.fileUploader.fileRejected', { name: file.name })
33
+
34
+ // Simplify error message for unsupported file types
35
+ if (errorMsg.includes('file-invalid-type')) {
36
+ errorMsg = t('documentPanel.uploadDocuments.fileUploader.unsupportedType')
37
+ }
38
+
39
+ // Set progress to 100% to display error message
40
+ setProgresses((pre) => ({
41
+ ...pre,
42
+ [file.name]: 100
43
+ }))
44
+
45
+ // Add error message to fileErrors
46
+ setFileErrors(prev => ({
47
+ ...prev,
48
+ [file.name]: errorMsg
49
+ }))
50
+ })
51
+ },
52
+ [setProgresses, setFileErrors, t]
53
+ )
54
+
55
  const handleDocumentsUpload = useCallback(
56
  async (filesToUpload: File[]) => {
57
  setIsUploading(true)
58
+
59
+ // Only clear errors for files that are being uploaded, keep errors for rejected files
60
+ setFileErrors(prev => {
61
+ const newErrors = { ...prev };
62
+ filesToUpload.forEach(file => {
63
+ delete newErrors[file.name];
64
+ });
65
+ return newErrors;
66
+ });
67
+
68
+ // Show uploading toast
69
+ const toastId = toast.loading(t('documentPanel.uploadDocuments.batch.uploading'))
70
 
71
  try {
72
+ // Track errors locally to ensure we have the final state
73
+ const uploadErrors: Record<string, string> = {}
74
+
75
+ await Promise.all(
76
+ filesToUpload.map(async (file) => {
77
  try {
78
+ // Initialize upload progress
79
+ setProgresses((pre) => ({
80
+ ...pre,
81
+ [file.name]: 0
82
+ }))
83
+
84
+ const result = await uploadDocument(file, (percentCompleted: number) => {
85
+ console.debug(t('documentPanel.uploadDocuments.single.uploading', { name: file.name, percent: percentCompleted }))
86
+ setProgresses((pre) => ({
87
+ ...pre,
88
+ [file.name]: percentCompleted
89
+ }))
90
+ })
91
+
92
+ if (result.status === 'duplicated') {
93
+ uploadErrors[file.name] = t('documentPanel.uploadDocuments.fileUploader.duplicateFile')
94
+ setFileErrors(prev => ({
95
+ ...prev,
96
+ [file.name]: t('documentPanel.uploadDocuments.fileUploader.duplicateFile')
97
+ }))
98
+ } else if (result.status !== 'success') {
99
+ uploadErrors[file.name] = result.message
100
+ setFileErrors(prev => ({
101
+ ...prev,
102
+ [file.name]: result.message
103
+ }))
104
+ }
105
+ } catch (err) {
106
+ console.error(`Upload failed for ${file.name}:`, err)
107
+
108
+ // Handle HTTP errors, including 400 errors
109
+ let errorMsg = errorMessage(err)
110
+
111
+ // If it's an axios error with response data, try to extract more detailed error info
112
+ if (err && typeof err === 'object' && 'response' in err) {
113
+ const axiosError = err as { response?: { status: number, data?: { detail?: string } } }
114
+ if (axiosError.response?.status === 400) {
115
+ // Extract specific error message from backend response
116
+ errorMsg = axiosError.response.data?.detail || errorMsg
117
+ }
118
+
119
+ // Set progress to 100% to display error message
120
+ setProgresses((pre) => ({
121
+ ...pre,
122
+ [file.name]: 100
123
+ }))
124
+ }
125
+
126
+ // Record error message in both local tracking and state
127
+ uploadErrors[file.name] = errorMsg
128
+ setFileErrors(prev => ({
129
+ ...prev,
130
+ [file.name]: errorMsg
131
+ }))
132
  }
133
+ })
 
 
 
 
 
134
  )
135
+
136
+ // Check if any files failed to upload using our local tracking
137
+ const hasErrors = Object.keys(uploadErrors).length > 0
138
+
139
+ // Update toast status
140
+ if (hasErrors) {
141
+ toast.error(t('documentPanel.uploadDocuments.batch.error'), { id: toastId })
142
+ } else {
143
+ toast.success(t('documentPanel.uploadDocuments.batch.success'), { id: toastId })
144
+ }
145
  } catch (err) {
146
+ console.error('Unexpected error during upload:', err)
147
+ toast.error(t('documentPanel.uploadDocuments.generalError', { error: errorMessage(err) }), { id: toastId })
148
  } finally {
149
  setIsUploading(false)
150
  }
151
  },
152
+ [setIsUploading, setProgresses, setFileErrors, t]
153
  )
154
 
155
  return (
 
183
  maxSize={200 * 1024 * 1024}
184
  description={t('documentPanel.uploadDocuments.fileTypes')}
185
  onUpload={handleDocumentsUpload}
186
+ onReject={handleRejectedFiles}
187
  progresses={progresses}
188
  fileErrors={fileErrors}
189
  disabled={isUploading}
lightrag_webui/src/components/ui/FileUploader.tsx CHANGED
@@ -39,6 +39,14 @@ interface FileUploaderProps extends React.HTMLAttributes<HTMLDivElement> {
39
  */
40
  onUpload?: (files: File[]) => Promise<void>
41
 
 
 
 
 
 
 
 
 
42
  /**
43
  * Progress of the uploaded files.
44
  * @type Record<string, number> | undefined
@@ -125,6 +133,7 @@ function FileUploader(props: FileUploaderProps) {
125
  value: valueProp,
126
  onValueChange,
127
  onUpload,
 
128
  progresses,
129
  fileErrors,
130
  accept = supportedFileTypes,
@@ -144,38 +153,77 @@ function FileUploader(props: FileUploaderProps) {
144
 
145
  const onDrop = React.useCallback(
146
  (acceptedFiles: File[], rejectedFiles: FileRejection[]) => {
147
- if (!multiple && maxFileCount === 1 && acceptedFiles.length > 1) {
 
 
 
 
148
  toast.error(t('documentPanel.uploadDocuments.fileUploader.singleFileLimit'))
149
  return
150
  }
151
 
152
- if ((files?.length ?? 0) + acceptedFiles.length > maxFileCount) {
153
  toast.error(t('documentPanel.uploadDocuments.fileUploader.maxFilesLimit', { count: maxFileCount }))
154
  return
155
  }
156
 
157
- const newFiles = acceptedFiles.map((file) =>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  Object.assign(file, {
159
  preview: URL.createObjectURL(file)
160
  })
161
  )
162
 
163
- const updatedFiles = files ? [...files, ...newFiles] : newFiles
 
 
 
 
 
 
 
 
 
 
164
 
 
165
  setFiles(updatedFiles)
166
 
167
- if (rejectedFiles.length > 0) {
168
- rejectedFiles.forEach(({ file }) => {
169
- toast.error(t('documentPanel.uploadDocuments.fileUploader.fileRejected', { name: file.name }))
170
- })
171
- }
 
 
 
 
172
 
173
- if (onUpload && updatedFiles.length > 0 && updatedFiles.length <= maxFileCount) {
174
- onUpload(updatedFiles)
 
 
 
 
 
 
 
175
  }
176
  },
177
-
178
- [files, maxFileCount, multiple, onUpload, setFiles, t]
179
  )
180
 
181
  function onRemove(index: number) {
@@ -204,11 +252,39 @@ function FileUploader(props: FileUploaderProps) {
204
  <div className="relative flex flex-col gap-6 overflow-hidden">
205
  <Dropzone
206
  onDrop={onDrop}
207
- accept={accept}
 
 
208
  maxSize={maxSize}
209
  maxFiles={maxFileCount}
210
  multiple={maxFileCount > 1 || multiple}
211
  disabled={isDisabled}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  >
213
  {({ getRootProps, getInputProps, isDragActive }) => (
214
  <div
@@ -279,18 +355,21 @@ function FileUploader(props: FileUploaderProps) {
279
  interface ProgressProps {
280
  value: number
281
  error?: boolean
 
282
  }
283
 
284
  function Progress({ value, error }: ProgressProps) {
285
  return (
286
- <div className="h-2 w-full overflow-hidden rounded-full bg-secondary">
287
- <div
288
- className={cn(
289
- 'h-full transition-all',
290
- error ? 'bg-destructive' : 'bg-primary'
291
- )}
292
- style={{ width: `${value}%` }}
293
- />
 
 
294
  </div>
295
  )
296
  }
@@ -307,16 +386,22 @@ function FileCard({ file, progress, error, onRemove }: FileCardProps) {
307
  return (
308
  <div className="relative flex items-center gap-2.5">
309
  <div className="flex flex-1 gap-2.5">
310
- {isFileWithPreview(file) ? <FilePreview file={file} /> : null}
 
 
 
 
311
  <div className="flex w-full flex-col gap-2">
312
  <div className="flex flex-col gap-px">
313
  <p className="text-foreground/80 line-clamp-1 text-sm font-medium">{file.name}</p>
314
  <p className="text-muted-foreground text-xs">{formatBytes(file.size)}</p>
315
  </div>
316
  {error ? (
317
- <div className="text-destructive text-sm">
318
- <Progress value={100} error={true} />
319
- <p className="mt-1">{error}</p>
 
 
320
  </div>
321
  ) : (
322
  progress ? <Progress value={progress} /> : null
 
39
  */
40
  onUpload?: (files: File[]) => Promise<void>
41
 
42
+ /**
43
+ * Function to be called when files are rejected.
44
+ * @type (rejections: FileRejection[]) => void
45
+ * @default undefined
46
+ * @example onReject={(rejections) => handleRejectedFiles(rejections)}
47
+ */
48
+ onReject?: (rejections: FileRejection[]) => void
49
+
50
  /**
51
  * Progress of the uploaded files.
52
  * @type Record<string, number> | undefined
 
133
  value: valueProp,
134
  onValueChange,
135
  onUpload,
136
+ onReject,
137
  progresses,
138
  fileErrors,
139
  accept = supportedFileTypes,
 
153
 
154
  const onDrop = React.useCallback(
155
  (acceptedFiles: File[], rejectedFiles: FileRejection[]) => {
156
+ // Calculate total file count including both accepted and rejected files
157
+ const totalFileCount = (files?.length ?? 0) + acceptedFiles.length + rejectedFiles.length
158
+
159
+ // Check file count limits
160
+ if (!multiple && maxFileCount === 1 && (acceptedFiles.length + rejectedFiles.length) > 1) {
161
  toast.error(t('documentPanel.uploadDocuments.fileUploader.singleFileLimit'))
162
  return
163
  }
164
 
165
+ if (totalFileCount > maxFileCount) {
166
  toast.error(t('documentPanel.uploadDocuments.fileUploader.maxFilesLimit', { count: maxFileCount }))
167
  return
168
  }
169
 
170
+ // Handle rejected files first - this will set error states
171
+ if (rejectedFiles.length > 0) {
172
+ if (onReject) {
173
+ // Use the onReject callback if provided
174
+ onReject(rejectedFiles)
175
+ } else {
176
+ // Fall back to toast notifications if no callback is provided
177
+ rejectedFiles.forEach(({ file }) => {
178
+ toast.error(t('documentPanel.uploadDocuments.fileUploader.fileRejected', { name: file.name }))
179
+ })
180
+ }
181
+ }
182
+
183
+ // Process accepted files
184
+ const newAcceptedFiles = acceptedFiles.map((file) =>
185
  Object.assign(file, {
186
  preview: URL.createObjectURL(file)
187
  })
188
  )
189
 
190
+ // Process rejected files for UI display
191
+ const newRejectedFiles = rejectedFiles.map(({ file }) =>
192
+ Object.assign(file, {
193
+ preview: URL.createObjectURL(file),
194
+ rejected: true
195
+ })
196
+ )
197
+
198
+ // Combine all files for display
199
+ const allNewFiles = [...newAcceptedFiles, ...newRejectedFiles]
200
+ const updatedFiles = files ? [...files, ...allNewFiles] : allNewFiles
201
 
202
+ // Update the files state with all files
203
  setFiles(updatedFiles)
204
 
205
+ // Only upload accepted files - make sure we're not uploading rejected files
206
+ if (onUpload && acceptedFiles.length > 0) {
207
+ // Filter out any files that might have been rejected by our custom validator
208
+ const validFiles = acceptedFiles.filter(file => {
209
+ // Check if file type is accepted
210
+ const fileExt = `.${file.name.split('.').pop()?.toLowerCase() || ''}`;
211
+ const isAccepted = Object.entries(accept || {}).some(([mimeType, extensions]) => {
212
+ return file.type === mimeType || extensions.includes(fileExt);
213
+ });
214
 
215
+ // Check file size
216
+ const isSizeValid = file.size <= maxSize;
217
+
218
+ return isAccepted && isSizeValid;
219
+ });
220
+
221
+ if (validFiles.length > 0) {
222
+ onUpload(validFiles);
223
+ }
224
  }
225
  },
226
+ [files, maxFileCount, multiple, onUpload, onReject, setFiles, t, accept, maxSize]
 
227
  )
228
 
229
  function onRemove(index: number) {
 
252
  <div className="relative flex flex-col gap-6 overflow-hidden">
253
  <Dropzone
254
  onDrop={onDrop}
255
+ // remove accept,use customizd validator
256
+ noClick={false}
257
+ noKeyboard={false}
258
  maxSize={maxSize}
259
  maxFiles={maxFileCount}
260
  multiple={maxFileCount > 1 || multiple}
261
  disabled={isDisabled}
262
+ validator={(file) => {
263
+ // Check if file type is accepted
264
+ const fileExt = `.${file.name.split('.').pop()?.toLowerCase() || ''}`;
265
+ const isAccepted = Object.entries(accept || {}).some(([mimeType, extensions]) => {
266
+ return file.type === mimeType || extensions.includes(fileExt);
267
+ });
268
+
269
+ if (!isAccepted) {
270
+ return {
271
+ code: 'file-invalid-type',
272
+ message: t('documentPanel.uploadDocuments.fileUploader.unsupportedType')
273
+ };
274
+ }
275
+
276
+ // Check file size
277
+ if (file.size > maxSize) {
278
+ return {
279
+ code: 'file-too-large',
280
+ message: t('documentPanel.uploadDocuments.fileUploader.fileTooLarge', {
281
+ maxSize: formatBytes(maxSize)
282
+ })
283
+ };
284
+ }
285
+
286
+ return null;
287
+ }}
288
  >
289
  {({ getRootProps, getInputProps, isDragActive }) => (
290
  <div
 
355
  interface ProgressProps {
356
  value: number
357
  error?: boolean
358
+ showIcon?: boolean // New property to control icon display
359
  }
360
 
361
  function Progress({ value, error }: ProgressProps) {
362
  return (
363
+ <div className="relative h-2 w-full">
364
+ <div className="h-full w-full overflow-hidden rounded-full bg-secondary">
365
+ <div
366
+ className={cn(
367
+ 'h-full transition-all',
368
+ error ? 'bg-red-400' : 'bg-primary'
369
+ )}
370
+ style={{ width: `${value}%` }}
371
+ />
372
+ </div>
373
  </div>
374
  )
375
  }
 
386
  return (
387
  <div className="relative flex items-center gap-2.5">
388
  <div className="flex flex-1 gap-2.5">
389
+ {error ? (
390
+ <FileText className="text-red-400 size-10" aria-hidden="true" />
391
+ ) : (
392
+ isFileWithPreview(file) ? <FilePreview file={file} /> : null
393
+ )}
394
  <div className="flex w-full flex-col gap-2">
395
  <div className="flex flex-col gap-px">
396
  <p className="text-foreground/80 line-clamp-1 text-sm font-medium">{file.name}</p>
397
  <p className="text-muted-foreground text-xs">{formatBytes(file.size)}</p>
398
  </div>
399
  {error ? (
400
+ <div className="text-red-400 text-sm">
401
+ <div className="relative mb-2">
402
+ <Progress value={100} error={true} />
403
+ </div>
404
+ <p>{error}</p>
405
  </div>
406
  ) : (
407
  progress ? <Progress value={progress} /> : null
lightrag_webui/src/features/DocumentManager.tsx CHANGED
@@ -21,7 +21,7 @@ import { errorMessage } from '@/lib/utils'
21
  import { toast } from 'sonner'
22
  import { useBackendState } from '@/stores/state'
23
 
24
- import { RefreshCwIcon, ActivityIcon } from 'lucide-react'
25
  import { DocStatusResponse } from '@/api/lightrag'
26
  import PipelineStatusDialog from '@/components/documents/PipelineStatusDialog'
27
 
@@ -47,6 +47,49 @@ const getDisplayFileName = (doc: DocStatusResponse, maxLength: number = 20): str
47
  };
48
 
49
  const pulseStyle = `
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  @keyframes pulse {
51
  0% {
52
  background-color: rgb(255 0 0 / 0.1);
@@ -87,6 +130,10 @@ const pulseStyle = `
87
  }
88
  `;
89
 
 
 
 
 
90
  export default function DocumentManager() {
91
  const [showPipelineStatus, setShowPipelineStatus] = useState(false)
92
  const { t } = useTranslation()
@@ -97,6 +144,52 @@ export default function DocumentManager() {
97
  const showFileName = useSettingsStore.use.showFileName()
98
  const setShowFileName = useSettingsStore.use.setShowFileName()
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  // Store previous status counts
101
  const prevStatusCounts = useRef({
102
  processed: 0,
@@ -115,6 +208,71 @@ export default function DocumentManager() {
115
  }
116
  }, [])
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  const fetchDocuments = useCallback(async () => {
119
  try {
120
  const docs = await getDocuments()
@@ -192,13 +350,18 @@ export default function DocumentManager() {
192
  return () => clearInterval(interval)
193
  }, [health, fetchDocuments, t, currentTab])
194
 
 
 
 
 
 
195
  return (
196
- <Card className="!size-full !rounded-none !border-none">
197
- <CardHeader>
198
  <CardTitle className="text-lg">{t('documentPanel.documentManager.title')}</CardTitle>
199
  </CardHeader>
200
- <CardContent className="space-y-4">
201
- <div className="flex gap-2">
202
  <div className="flex gap-2">
203
  <Button
204
  variant="outline"
@@ -231,8 +394,8 @@ export default function DocumentManager() {
231
  />
232
  </div>
233
 
234
- <Card>
235
- <CardHeader>
236
  <div className="flex justify-between items-center">
237
  <CardTitle>{t('documentPanel.documentManager.uploadedTitle')}</CardTitle>
238
  <div className="flex items-center gap-2">
@@ -250,95 +413,140 @@ export default function DocumentManager() {
250
  </Button>
251
  </div>
252
  </div>
253
- <CardDescription>{t('documentPanel.documentManager.uploadedDescription')}</CardDescription>
254
  </CardHeader>
255
 
256
- <CardContent>
257
  {!docs && (
258
- <EmptyCard
259
- title={t('documentPanel.documentManager.emptyTitle')}
260
- description={t('documentPanel.documentManager.emptyDescription')}
261
- />
 
 
262
  )}
263
  {docs && (
264
- <Table>
265
- <TableHeader>
266
- <TableRow>
267
- <TableHead>{t('documentPanel.documentManager.columns.id')}</TableHead>
268
- <TableHead>{t('documentPanel.documentManager.columns.summary')}</TableHead>
269
- <TableHead>{t('documentPanel.documentManager.columns.status')}</TableHead>
270
- <TableHead>{t('documentPanel.documentManager.columns.length')}</TableHead>
271
- <TableHead>{t('documentPanel.documentManager.columns.chunks')}</TableHead>
272
- <TableHead>{t('documentPanel.documentManager.columns.created')}</TableHead>
273
- <TableHead>{t('documentPanel.documentManager.columns.updated')}</TableHead>
274
- </TableRow>
275
- </TableHeader>
276
- <TableBody className="text-sm">
277
- {Object.entries(docs.statuses).map(([status, documents]) =>
278
- documents.map((doc) => (
279
- <TableRow key={doc.id}>
280
- <TableCell className="truncate font-mono overflow-visible">
281
- {showFileName ? (
282
- <>
283
- <div className="group relative overflow-visible">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  <div className="truncate">
285
- {getDisplayFileName(doc, 35)}
286
  </div>
287
- <div className="invisible group-hover:visible absolute z-[9999] mt-1 max-w-[800px] whitespace-normal break-all rounded-md bg-black/95 px-3 py-2 text-sm text-white shadow-lg dark:bg-white/95 dark:text-black">
288
- {doc.file_path}
289
  </div>
290
  </div>
291
- <div className="text-xs text-gray-500">{doc.id}</div>
292
- </>
293
- ) : (
294
- <div className="group relative overflow-visible">
295
- <div className="truncate">
296
- {doc.id}
297
- </div>
298
- <div className="invisible group-hover:visible absolute z-[9999] mt-1 max-w-[800px] whitespace-normal break-all rounded-md bg-black/95 px-3 py-2 text-sm text-white shadow-lg dark:bg-white/95 dark:text-black">
299
- {doc.file_path}
300
- </div>
301
- </div>
302
- )}
303
- </TableCell>
304
- <TableCell className="max-w-xs min-w-24 truncate overflow-visible">
305
- <div className="group relative overflow-visible">
306
- <div className="truncate">
307
- {doc.content_summary}
308
- </div>
309
- <div className="invisible group-hover:visible absolute z-[9999] mt-1 max-w-[800px] whitespace-normal break-all rounded-md bg-black/95 px-3 py-2 text-sm text-white shadow-lg dark:bg-white/95 dark:text-black">
310
- {doc.content_summary}
311
- </div>
312
- </div>
313
- </TableCell>
314
- <TableCell>
315
- {status === 'processed' && (
316
- <span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
317
- )}
318
- {status === 'processing' && (
319
- <span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
320
- )}
321
- {status === 'pending' && <span className="text-yellow-600">{t('documentPanel.documentManager.status.pending')}</span>}
322
- {status === 'failed' && <span className="text-red-600">{t('documentPanel.documentManager.status.failed')}</span>}
323
- {doc.error && (
324
- <span className="ml-2 text-red-500" title={doc.error}>
325
- ⚠️
326
- </span>
327
- )}
328
- </TableCell>
329
- <TableCell>{doc.content_length ?? '-'}</TableCell>
330
- <TableCell>{doc.chunks_count ?? '-'}</TableCell>
331
- <TableCell className="truncate">
332
- {new Date(doc.created_at).toLocaleString()}
333
- </TableCell>
334
- <TableCell className="truncate">
335
- {new Date(doc.updated_at).toLocaleString()}
336
- </TableCell>
337
- </TableRow>
338
- ))
339
- )}
340
- </TableBody>
341
- </Table>
342
  )}
343
  </CardContent>
344
  </Card>
 
21
  import { toast } from 'sonner'
22
  import { useBackendState } from '@/stores/state'
23
 
24
+ import { RefreshCwIcon, ActivityIcon, ArrowUpIcon, ArrowDownIcon } from 'lucide-react'
25
  import { DocStatusResponse } from '@/api/lightrag'
26
  import PipelineStatusDialog from '@/components/documents/PipelineStatusDialog'
27
 
 
47
  };
48
 
49
  const pulseStyle = `
50
+ /* Tooltip styles */
51
+ .tooltip-container {
52
+ position: relative;
53
+ overflow: visible !important;
54
+ }
55
+
56
+ .tooltip {
57
+ position: fixed; /* Use fixed positioning to escape overflow constraints */
58
+ z-index: 9999; /* Ensure tooltip appears above all other elements */
59
+ max-width: 600px;
60
+ white-space: normal;
61
+ border-radius: 0.375rem;
62
+ padding: 0.5rem 0.75rem;
63
+ background-color: rgba(0, 0, 0, 0.95);
64
+ color: white;
65
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
66
+ pointer-events: none; /* Prevent tooltip from interfering with mouse events */
67
+ opacity: 0;
68
+ visibility: hidden;
69
+ transition: opacity 0.15s, visibility 0.15s;
70
+ }
71
+
72
+ .tooltip.visible {
73
+ opacity: 1;
74
+ visibility: visible;
75
+ }
76
+
77
+ .dark .tooltip {
78
+ background-color: rgba(255, 255, 255, 0.95);
79
+ color: black;
80
+ }
81
+
82
+ /* Position tooltip helper class */
83
+ .tooltip-helper {
84
+ position: absolute;
85
+ visibility: hidden;
86
+ pointer-events: none;
87
+ top: 0;
88
+ left: 0;
89
+ width: 100%;
90
+ height: 0;
91
+ }
92
+
93
  @keyframes pulse {
94
  0% {
95
  background-color: rgb(255 0 0 / 0.1);
 
130
  }
131
  `;
132
 
133
+ // Type definitions for sort field and direction
134
+ type SortField = 'created_at' | 'updated_at' | 'id';
135
+ type SortDirection = 'asc' | 'desc';
136
+
137
  export default function DocumentManager() {
138
  const [showPipelineStatus, setShowPipelineStatus] = useState(false)
139
  const { t } = useTranslation()
 
144
  const showFileName = useSettingsStore.use.showFileName()
145
  const setShowFileName = useSettingsStore.use.setShowFileName()
146
 
147
+ // Sort state
148
+ const [sortField, setSortField] = useState<SortField>('updated_at')
149
+ const [sortDirection, setSortDirection] = useState<SortDirection>('desc')
150
+
151
+ // Handle sort column click
152
+ const handleSort = (field: SortField) => {
153
+ if (sortField === field) {
154
+ // Toggle sort direction if clicking the same field
155
+ setSortDirection(prev => prev === 'asc' ? 'desc' : 'asc')
156
+ } else {
157
+ // Set new sort field with default desc direction
158
+ setSortField(field)
159
+ setSortDirection('desc')
160
+ }
161
+ }
162
+
163
+ // Sort documents based on current sort field and direction
164
+ const sortDocuments = (documents: DocStatusResponse[]) => {
165
+ return [...documents].sort((a, b) => {
166
+ let valueA, valueB;
167
+
168
+ // Special handling for ID field based on showFileName setting
169
+ if (sortField === 'id' && showFileName) {
170
+ valueA = getDisplayFileName(a);
171
+ valueB = getDisplayFileName(b);
172
+ } else if (sortField === 'id') {
173
+ valueA = a.id;
174
+ valueB = b.id;
175
+ } else {
176
+ // Date fields
177
+ valueA = new Date(a[sortField]).getTime();
178
+ valueB = new Date(b[sortField]).getTime();
179
+ }
180
+
181
+ // Apply sort direction
182
+ const sortMultiplier = sortDirection === 'asc' ? 1 : -1;
183
+
184
+ // Compare values
185
+ if (typeof valueA === 'string' && typeof valueB === 'string') {
186
+ return sortMultiplier * valueA.localeCompare(valueB);
187
+ } else {
188
+ return sortMultiplier * (valueA > valueB ? 1 : valueA < valueB ? -1 : 0);
189
+ }
190
+ });
191
+ }
192
+
193
  // Store previous status counts
194
  const prevStatusCounts = useRef({
195
  processed: 0,
 
208
  }
209
  }, [])
210
 
211
+ // Reference to the card content element
212
+ const cardContentRef = useRef<HTMLDivElement>(null);
213
+
214
+ // Add tooltip position adjustment for fixed positioning
215
+ useEffect(() => {
216
+ if (!docs) return;
217
+
218
+ // Function to position tooltips
219
+ const positionTooltips = () => {
220
+ // Get all tooltip containers
221
+ const containers = document.querySelectorAll<HTMLElement>('.tooltip-container');
222
+
223
+ containers.forEach(container => {
224
+ const tooltip = container.querySelector<HTMLElement>('.tooltip');
225
+ if (!tooltip) return;
226
+
227
+ // Skip tooltips that aren't visible
228
+ if (!tooltip.classList.contains('visible')) return;
229
+
230
+ // Get container position
231
+ const rect = container.getBoundingClientRect();
232
+
233
+ // Position tooltip above the container
234
+ tooltip.style.left = `${rect.left}px`;
235
+ tooltip.style.top = `${rect.top - 5}px`;
236
+ tooltip.style.transform = 'translateY(-100%)';
237
+ });
238
+ };
239
+
240
+ // Set up event listeners
241
+ const handleMouseOver = (e: MouseEvent) => {
242
+ // Check if target or its parent is a tooltip container
243
+ const target = e.target as HTMLElement;
244
+ const container = target.closest('.tooltip-container');
245
+ if (!container) return;
246
+
247
+ // Find tooltip and make it visible
248
+ const tooltip = container.querySelector<HTMLElement>('.tooltip');
249
+ if (tooltip) {
250
+ tooltip.classList.add('visible');
251
+ // Position immediately without delay
252
+ positionTooltips();
253
+ }
254
+ };
255
+
256
+ const handleMouseOut = (e: MouseEvent) => {
257
+ const target = e.target as HTMLElement;
258
+ const container = target.closest('.tooltip-container');
259
+ if (!container) return;
260
+
261
+ const tooltip = container.querySelector<HTMLElement>('.tooltip');
262
+ if (tooltip) {
263
+ tooltip.classList.remove('visible');
264
+ }
265
+ };
266
+
267
+ document.addEventListener('mouseover', handleMouseOver);
268
+ document.addEventListener('mouseout', handleMouseOut);
269
+
270
+ return () => {
271
+ document.removeEventListener('mouseover', handleMouseOver);
272
+ document.removeEventListener('mouseout', handleMouseOut);
273
+ };
274
+ }, [docs]);
275
+
276
  const fetchDocuments = useCallback(async () => {
277
  try {
278
  const docs = await getDocuments()
 
350
  return () => clearInterval(interval)
351
  }, [health, fetchDocuments, t, currentTab])
352
 
353
+ // Add dependency on sort state to re-render when sort changes
354
+ useEffect(() => {
355
+ // This effect ensures the component re-renders when sort state changes
356
+ }, [sortField, sortDirection]);
357
+
358
  return (
359
+ <Card className="!rounded-none !overflow-hidden flex flex-col h-full min-h-0">
360
+ <CardHeader className="py-2 px-6">
361
  <CardTitle className="text-lg">{t('documentPanel.documentManager.title')}</CardTitle>
362
  </CardHeader>
363
+ <CardContent className="flex-1 flex flex-col min-h-0 overflow-auto">
364
+ <div className="flex gap-2 mb-2">
365
  <div className="flex gap-2">
366
  <Button
367
  variant="outline"
 
394
  />
395
  </div>
396
 
397
+ <Card className="flex-1 flex flex-col border rounded-md min-h-0 mb-2">
398
+ <CardHeader className="flex-none py-2 px-4">
399
  <div className="flex justify-between items-center">
400
  <CardTitle>{t('documentPanel.documentManager.uploadedTitle')}</CardTitle>
401
  <div className="flex items-center gap-2">
 
413
  </Button>
414
  </div>
415
  </div>
416
+ <CardDescription aria-hidden="true" className="hidden">{t('documentPanel.documentManager.uploadedDescription')}</CardDescription>
417
  </CardHeader>
418
 
419
+ <CardContent className="flex-1 relative p-0" ref={cardContentRef}>
420
  {!docs && (
421
+ <div className="absolute inset-0 p-0">
422
+ <EmptyCard
423
+ title={t('documentPanel.documentManager.emptyTitle')}
424
+ description={t('documentPanel.documentManager.emptyDescription')}
425
+ />
426
+ </div>
427
  )}
428
  {docs && (
429
+ <div className="absolute inset-0 flex flex-col p-0">
430
+ <div className="absolute inset-[-1px] flex flex-col p-0 border rounded-md border-gray-200 dark:border-gray-700 overflow-hidden">
431
+ <Table className="w-full">
432
+ <TableHeader className="sticky top-0 bg-background z-10 shadow-sm">
433
+ <TableRow className="border-b bg-card/95 backdrop-blur supports-[backdrop-filter]:bg-card/75 shadow-[inset_0_-1px_0_rgba(0,0,0,0.1)]">
434
+ <TableHead
435
+ onClick={() => handleSort('id')}
436
+ className="cursor-pointer hover:bg-gray-200 dark:hover:bg-gray-800 select-none"
437
+ >
438
+ <div className="flex items-center">
439
+ {t('documentPanel.documentManager.columns.id')}
440
+ {sortField === 'id' && (
441
+ <span className="ml-1">
442
+ {sortDirection === 'asc' ? <ArrowUpIcon size={14} /> : <ArrowDownIcon size={14} />}
443
+ </span>
444
+ )}
445
+ </div>
446
+ </TableHead>
447
+ <TableHead>{t('documentPanel.documentManager.columns.summary')}</TableHead>
448
+ <TableHead>{t('documentPanel.documentManager.columns.status')}</TableHead>
449
+ <TableHead>{t('documentPanel.documentManager.columns.length')}</TableHead>
450
+ <TableHead>{t('documentPanel.documentManager.columns.chunks')}</TableHead>
451
+ <TableHead
452
+ onClick={() => handleSort('created_at')}
453
+ className="cursor-pointer hover:bg-gray-200 dark:hover:bg-gray-800 select-none"
454
+ >
455
+ <div className="flex items-center">
456
+ {t('documentPanel.documentManager.columns.created')}
457
+ {sortField === 'created_at' && (
458
+ <span className="ml-1">
459
+ {sortDirection === 'asc' ? <ArrowUpIcon size={14} /> : <ArrowDownIcon size={14} />}
460
+ </span>
461
+ )}
462
+ </div>
463
+ </TableHead>
464
+ <TableHead
465
+ onClick={() => handleSort('updated_at')}
466
+ className="cursor-pointer hover:bg-gray-200 dark:hover:bg-gray-800 select-none"
467
+ >
468
+ <div className="flex items-center">
469
+ {t('documentPanel.documentManager.columns.updated')}
470
+ {sortField === 'updated_at' && (
471
+ <span className="ml-1">
472
+ {sortDirection === 'asc' ? <ArrowUpIcon size={14} /> : <ArrowDownIcon size={14} />}
473
+ </span>
474
+ )}
475
+ </div>
476
+ </TableHead>
477
+ </TableRow>
478
+ </TableHeader>
479
+ <TableBody className="text-sm overflow-auto">
480
+ {Object.entries(docs.statuses).flatMap(([status, documents]) => {
481
+ // Apply sorting to documents
482
+ const sortedDocuments = sortDocuments(documents);
483
+
484
+ return sortedDocuments.map(doc => (
485
+ <TableRow key={doc.id}>
486
+ <TableCell className="truncate font-mono overflow-visible max-w-[250px]">
487
+ {showFileName ? (
488
+ <>
489
+ <div className="group relative overflow-visible tooltip-container">
490
+ <div className="truncate">
491
+ {getDisplayFileName(doc, 30)}
492
+ </div>
493
+ <div className="invisible group-hover:visible tooltip">
494
+ {doc.file_path}
495
+ </div>
496
+ </div>
497
+ <div className="text-xs text-gray-500">{doc.id}</div>
498
+ </>
499
+ ) : (
500
+ <div className="group relative overflow-visible tooltip-container">
501
+ <div className="truncate">
502
+ {doc.id}
503
+ </div>
504
+ <div className="invisible group-hover:visible tooltip">
505
+ {doc.file_path}
506
+ </div>
507
+ </div>
508
+ )}
509
+ </TableCell>
510
+ <TableCell className="max-w-xs min-w-45 truncate overflow-visible">
511
+ <div className="group relative overflow-visible tooltip-container">
512
  <div className="truncate">
513
+ {doc.content_summary}
514
  </div>
515
+ <div className="invisible group-hover:visible tooltip">
516
+ {doc.content_summary}
517
  </div>
518
  </div>
519
+ </TableCell>
520
+ <TableCell>
521
+ {status === 'processed' && (
522
+ <span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
523
+ )}
524
+ {status === 'processing' && (
525
+ <span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
526
+ )}
527
+ {status === 'pending' && <span className="text-yellow-600">{t('documentPanel.documentManager.status.pending')}</span>}
528
+ {status === 'failed' && <span className="text-red-600">{t('documentPanel.documentManager.status.failed')}</span>}
529
+ {doc.error && (
530
+ <span className="ml-2 text-red-500" title={doc.error}>
531
+ ⚠️
532
+ </span>
533
+ )}
534
+ </TableCell>
535
+ <TableCell>{doc.content_length ?? '-'}</TableCell>
536
+ <TableCell>{doc.chunks_count ?? '-'}</TableCell>
537
+ <TableCell className="truncate">
538
+ {new Date(doc.created_at).toLocaleString()}
539
+ </TableCell>
540
+ <TableCell className="truncate">
541
+ {new Date(doc.updated_at).toLocaleString()}
542
+ </TableCell>
543
+ </TableRow>
544
+ ));
545
+ })}
546
+ </TableBody>
547
+ </Table>
548
+ </div>
549
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  )}
551
  </CardContent>
552
  </Card>
lightrag_webui/src/hooks/useLightragGraph.tsx CHANGED
@@ -205,7 +205,7 @@ const createSigmaGraph = (rawGraph: RawGraph | null) => {
205
  // Add edges from raw graph data
206
  for (const rawEdge of rawGraph?.edges ?? []) {
207
  rawEdge.dynamicId = graph.addDirectedEdge(rawEdge.source, rawEdge.target, {
208
- label: rawEdge.type || undefined
209
  })
210
  }
211
 
@@ -660,7 +660,7 @@ const useLightrangeGraph = () => {
660
 
661
  // Add the edge to the sigma graph
662
  newEdge.dynamicId = sigmaGraph.addDirectedEdge(newEdge.source, newEdge.target, {
663
- label: newEdge.type || undefined
664
  });
665
 
666
  // Add the edge to the raw graph
 
205
  // Add edges from raw graph data
206
  for (const rawEdge of rawGraph?.edges ?? []) {
207
  rawEdge.dynamicId = graph.addDirectedEdge(rawEdge.source, rawEdge.target, {
208
+ label: rawEdge.properties?.keywords || undefined
209
  })
210
  }
211
 
 
660
 
661
  // Add the edge to the sigma graph
662
  newEdge.dynamicId = sigmaGraph.addDirectedEdge(newEdge.source, newEdge.target, {
663
+ label: newEdge.properties?.keywords || undefined
664
  });
665
 
666
  // Add the edge to the raw graph
lightrag_webui/src/locales/ar.json CHANGED
@@ -65,16 +65,19 @@
65
  "singleFileLimit": "لا يمكن رفع أكثر من ملف واحد في المرة الواحدة",
66
  "maxFilesLimit": "لا يمكن رفع أكثر من {{count}} ملفات",
67
  "fileRejected": "تم رفض الملف {{name}}",
 
 
68
  "dropHere": "أفلت الملفات هنا",
69
  "dragAndDrop": "اسحب وأفلت الملفات هنا، أو انقر للاختيار",
70
  "removeFile": "إزالة الملف",
71
- "uploadDescription": "يمكنك رفع {{isMultiple ? 'عدة' : count}} ملفات (حتى {{maxSize}} لكل منها)"
 
72
  }
73
  },
74
  "documentManager": {
75
  "title": "إدارة المستندات",
76
  "scanButton": "مسح ضوئي",
77
- "scanTooltip": "مسح المستندات ضوئيًا",
78
  "pipelineStatusButton": "حالة خط المعالجة",
79
  "pipelineStatusTooltip": "عرض حالة خط المعالجة",
80
  "uploadedTitle": "المستندات المرفوعة",
@@ -212,7 +215,8 @@
212
  "entity_id": "الاسم",
213
  "entity_type": "النوع",
214
  "source_id": "معرف المصدر",
215
- "Neighbour": "الجار"
 
216
  }
217
  },
218
  "edge": {
 
65
  "singleFileLimit": "لا يمكن رفع أكثر من ملف واحد في المرة الواحدة",
66
  "maxFilesLimit": "لا يمكن رفع أكثر من {{count}} ملفات",
67
  "fileRejected": "تم رفض الملف {{name}}",
68
+ "unsupportedType": "نوع الملف غير مدعوم",
69
+ "fileTooLarge": "حجم الملف كبير جدًا، الحد الأقصى {{maxSize}}",
70
  "dropHere": "أفلت الملفات هنا",
71
  "dragAndDrop": "اسحب وأفلت الملفات هنا، أو انقر للاختيار",
72
  "removeFile": "إزالة الملف",
73
+ "uploadDescription": "يمكنك رفع {{isMultiple ? 'عدة' : count}} ملفات (حتى {{maxSize}} لكل منها)",
74
+ "duplicateFile": "اسم الملف موجود بالفعل في ذاكرة التخزين المؤقت للخادم"
75
  }
76
  },
77
  "documentManager": {
78
  "title": "إدارة المستندات",
79
  "scanButton": "مسح ضوئي",
80
+ "scanTooltip": "مسح المستندات ضوئيًا في مجلد الإدخال",
81
  "pipelineStatusButton": "حالة خط المعالجة",
82
  "pipelineStatusTooltip": "عرض حالة خط المعالجة",
83
  "uploadedTitle": "المستندات المرفوعة",
 
215
  "entity_id": "الاسم",
216
  "entity_type": "النوع",
217
  "source_id": "معرف المصدر",
218
+ "Neighbour": "الجار",
219
+ "file_path": "المصدر"
220
  }
221
  },
222
  "edge": {
lightrag_webui/src/locales/en.json CHANGED
@@ -65,16 +65,19 @@
65
  "singleFileLimit": "Cannot upload more than 1 file at a time",
66
  "maxFilesLimit": "Cannot upload more than {{count}} files",
67
  "fileRejected": "File {{name}} was rejected",
 
 
68
  "dropHere": "Drop the files here",
69
  "dragAndDrop": "Drag and drop files here, or click to select files",
70
  "removeFile": "Remove file",
71
- "uploadDescription": "You can upload {{isMultiple ? 'multiple' : count}} files (up to {{maxSize}} each)"
 
72
  }
73
  },
74
  "documentManager": {
75
  "title": "Document Management",
76
  "scanButton": "Scan",
77
- "scanTooltip": "Scan documents",
78
  "pipelineStatusButton": "Pipeline Status",
79
  "pipelineStatusTooltip": "View pipeline status",
80
  "uploadedTitle": "Uploaded Documents",
@@ -212,7 +215,8 @@
212
  "entity_id": "Name",
213
  "entity_type": "Type",
214
  "source_id": "SrcID",
215
- "Neighbour": "Neigh"
 
216
  }
217
  },
218
  "edge": {
 
65
  "singleFileLimit": "Cannot upload more than 1 file at a time",
66
  "maxFilesLimit": "Cannot upload more than {{count}} files",
67
  "fileRejected": "File {{name}} was rejected",
68
+ "unsupportedType": "Unsupported file type",
69
+ "fileTooLarge": "File too large, maximum size is {{maxSize}}",
70
  "dropHere": "Drop the files here",
71
  "dragAndDrop": "Drag and drop files here, or click to select files",
72
  "removeFile": "Remove file",
73
+ "uploadDescription": "You can upload {{isMultiple ? 'multiple' : count}} files (up to {{maxSize}} each)",
74
+ "duplicateFile": "File name already exists in server cache"
75
  }
76
  },
77
  "documentManager": {
78
  "title": "Document Management",
79
  "scanButton": "Scan",
80
+ "scanTooltip": "Scan documents in input folder",
81
  "pipelineStatusButton": "Pipeline Status",
82
  "pipelineStatusTooltip": "View pipeline status",
83
  "uploadedTitle": "Uploaded Documents",
 
215
  "entity_id": "Name",
216
  "entity_type": "Type",
217
  "source_id": "SrcID",
218
+ "Neighbour": "Neigh",
219
+ "file_path": "Source"
220
  }
221
  },
222
  "edge": {
lightrag_webui/src/locales/fr.json CHANGED
@@ -65,16 +65,19 @@
65
  "singleFileLimit": "Impossible de télécharger plus d'un fichier à la fois",
66
  "maxFilesLimit": "Impossible de télécharger plus de {{count}} fichiers",
67
  "fileRejected": "Le fichier {{name}} a été rejeté",
 
 
68
  "dropHere": "Déposez les fichiers ici",
69
  "dragAndDrop": "Glissez et déposez les fichiers ici, ou cliquez pour sélectionner",
70
  "removeFile": "Supprimer le fichier",
71
- "uploadDescription": "Vous pouvez télécharger {{isMultiple ? 'plusieurs' : count}} fichiers (jusqu'à {{maxSize}} chacun)"
 
72
  }
73
  },
74
  "documentManager": {
75
  "title": "Gestion des documents",
76
  "scanButton": "Scanner",
77
- "scanTooltip": "Scanner les documents",
78
  "pipelineStatusButton": "État du Pipeline",
79
  "pipelineStatusTooltip": "Voir l'état du pipeline",
80
  "uploadedTitle": "Documents téléchargés",
@@ -212,7 +215,8 @@
212
  "entity_id": "Nom",
213
  "entity_type": "Type",
214
  "source_id": "ID source",
215
- "Neighbour": "Voisin"
 
216
  }
217
  },
218
  "edge": {
 
65
  "singleFileLimit": "Impossible de télécharger plus d'un fichier à la fois",
66
  "maxFilesLimit": "Impossible de télécharger plus de {{count}} fichiers",
67
  "fileRejected": "Le fichier {{name}} a été rejeté",
68
+ "unsupportedType": "Type de fichier non pris en charge",
69
+ "fileTooLarge": "Fichier trop volumineux, taille maximale {{maxSize}}",
70
  "dropHere": "Déposez les fichiers ici",
71
  "dragAndDrop": "Glissez et déposez les fichiers ici, ou cliquez pour sélectionner",
72
  "removeFile": "Supprimer le fichier",
73
+ "uploadDescription": "Vous pouvez télécharger {{isMultiple ? 'plusieurs' : count}} fichiers (jusqu'à {{maxSize}} chacun)",
74
+ "duplicateFile": "Le nom du fichier existe déjà dans le cache du serveur"
75
  }
76
  },
77
  "documentManager": {
78
  "title": "Gestion des documents",
79
  "scanButton": "Scanner",
80
+ "scanTooltip": "Scanner les documents dans le dossier d'entrée",
81
  "pipelineStatusButton": "État du Pipeline",
82
  "pipelineStatusTooltip": "Voir l'état du pipeline",
83
  "uploadedTitle": "Documents téléchargés",
 
215
  "entity_id": "Nom",
216
  "entity_type": "Type",
217
  "source_id": "ID source",
218
+ "Neighbour": "Voisin",
219
+ "file_path": "Source"
220
  }
221
  },
222
  "edge": {
lightrag_webui/src/locales/zh.json CHANGED
@@ -65,16 +65,19 @@
65
  "singleFileLimit": "一次只能上传一个文件",
66
  "maxFilesLimit": "最多只能上传 {{count}} 个文件",
67
  "fileRejected": "文件 {{name}} 被拒绝",
 
 
68
  "dropHere": "将文件拖放到此处",
69
  "dragAndDrop": "拖放文件到此处,或点击选择文件",
70
  "removeFile": "移除文件",
71
- "uploadDescription": "您可以上传{{isMultiple ? '多个' : count}}个文件(每个文件最大{{maxSize}})"
 
72
  }
73
  },
74
  "documentManager": {
75
  "title": "文档管理",
76
  "scanButton": "扫描",
77
- "scanTooltip": "扫描文档",
78
  "pipelineStatusButton": "流水线状态",
79
  "pipelineStatusTooltip": "查看流水线状态",
80
  "uploadedTitle": "已上传文档",
@@ -212,7 +215,8 @@
212
  "entity_id": "名称",
213
  "entity_type": "类型",
214
  "source_id": "信源ID",
215
- "Neighbour": "邻接"
 
216
  }
217
  },
218
  "edge": {
 
65
  "singleFileLimit": "一次只能上传一个文件",
66
  "maxFilesLimit": "最多只能上传 {{count}} 个文件",
67
  "fileRejected": "文件 {{name}} 被拒绝",
68
+ "unsupportedType": "不支持的文件类型",
69
+ "fileTooLarge": "文件过大,最大允许 {{maxSize}}",
70
  "dropHere": "将文件拖放到此处",
71
  "dragAndDrop": "拖放文件到此处,或点击选择文件",
72
  "removeFile": "移除文件",
73
+ "uploadDescription": "您可以上传{{isMultiple ? '多个' : count}}个文件(每个文件最大{{maxSize}})",
74
+ "duplicateFile": "文件名与服务器上的缓存重复"
75
  }
76
  },
77
  "documentManager": {
78
  "title": "文档管理",
79
  "scanButton": "扫描",
80
+ "scanTooltip": "扫描输入目录中的文档",
81
  "pipelineStatusButton": "流水线状态",
82
  "pipelineStatusTooltip": "查看流水线状态",
83
  "uploadedTitle": "已上传文档",
 
215
  "entity_id": "名称",
216
  "entity_type": "类型",
217
  "source_id": "信源ID",
218
+ "Neighbour": "邻接",
219
+ "file_path": "信源"
220
  }
221
  },
222
  "edge": {