LarFii commited on
Commit
275e33e
·
1 Parent(s): 8b3b01c

fix linting

Browse files
examples/lightrag_api_ollama_demo.py CHANGED
@@ -36,7 +36,10 @@ async def init():
36
  llm_model_name="gemma2:9b",
37
  llm_model_max_async=4,
38
  llm_model_max_token_size=8192,
39
- llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 8192}},
 
 
 
40
  embedding_func=EmbeddingFunc(
41
  embedding_dim=768,
42
  max_token_size=8192,
@@ -64,6 +67,8 @@ async def lifespan(app: FastAPI):
64
  app = FastAPI(
65
  title="LightRAG API", description="API for RAG operations", lifespan=lifespan
66
  )
 
 
67
  # Data models
68
  class QueryRequest(BaseModel):
69
  query: str
 
36
  llm_model_name="gemma2:9b",
37
  llm_model_max_async=4,
38
  llm_model_max_token_size=8192,
39
+ llm_model_kwargs={
40
+ "host": "http://localhost:11434",
41
+ "options": {"num_ctx": 8192},
42
+ },
43
  embedding_func=EmbeddingFunc(
44
  embedding_dim=768,
45
  max_token_size=8192,
 
67
  app = FastAPI(
68
  title="LightRAG API", description="API for RAG operations", lifespan=lifespan
69
  )
70
+
71
+
72
  # Data models
73
  class QueryRequest(BaseModel):
74
  query: str
examples/lightrag_api_openai_compatible_demo.py CHANGED
@@ -75,7 +75,7 @@ async def get_embedding_dim():
75
  # Initialize RAG instance
76
  async def init():
77
  embedding_dimension = await get_embedding_dim()
78
-
79
  rag = LightRAG(
80
  working_dir=WORKING_DIR,
81
  llm_model_func=llm_model_func,
@@ -88,9 +88,10 @@ async def init():
88
 
89
  await rag.initialize_storages()
90
  await initialize_pipeline_status()
91
-
92
  return rag
93
 
 
94
  @asynccontextmanager
95
  async def lifespan(app: FastAPI):
96
  global rag
 
75
  # Initialize RAG instance
76
  async def init():
77
  embedding_dimension = await get_embedding_dim()
78
+
79
  rag = LightRAG(
80
  working_dir=WORKING_DIR,
81
  llm_model_func=llm_model_func,
 
88
 
89
  await rag.initialize_storages()
90
  await initialize_pipeline_status()
91
+
92
  return rag
93
 
94
+
95
  @asynccontextmanager
96
  async def lifespan(app: FastAPI):
97
  global rag
examples/lightrag_bedrock_demo.py CHANGED
@@ -21,6 +21,7 @@ WORKING_DIR = "./dickens"
21
  if not os.path.exists(WORKING_DIR):
22
  os.mkdir(WORKING_DIR)
23
 
 
24
  async def initialize_rag():
25
  rag = LightRAG(
26
  working_dir=WORKING_DIR,
@@ -33,9 +34,10 @@ async def initialize_rag():
33
 
34
  await rag.initialize_storages()
35
  await initialize_pipeline_status()
36
-
37
  return rag
38
 
 
39
  def main():
40
  rag = asyncio.run(initialize_rag())
41
 
@@ -47,5 +49,7 @@ def main():
47
  print(f"| {mode.capitalize()} |")
48
  print("+-" + "-" * len(mode) + "-+\n")
49
  print(
50
- rag.query("What are the top themes in this story?", param=QueryParam(mode=mode))
 
 
51
  )
 
21
  if not os.path.exists(WORKING_DIR):
22
  os.mkdir(WORKING_DIR)
23
 
24
+
25
  async def initialize_rag():
26
  rag = LightRAG(
27
  working_dir=WORKING_DIR,
 
34
 
35
  await rag.initialize_storages()
36
  await initialize_pipeline_status()
37
+
38
  return rag
39
 
40
+
41
  def main():
42
  rag = asyncio.run(initialize_rag())
43
 
 
49
  print(f"| {mode.capitalize()} |")
50
  print("+-" + "-" * len(mode) + "-+\n")
51
  print(
52
+ rag.query(
53
+ "What are the top themes in this story?", param=QueryParam(mode=mode)
54
+ )
55
  )
examples/lightrag_gemini_demo.py CHANGED
@@ -12,6 +12,7 @@ from lightrag.kg.shared_storage import initialize_pipeline_status
12
 
13
  import asyncio
14
  import nest_asyncio
 
15
  # Apply nest_asyncio to solve event loop issues
16
  nest_asyncio.apply()
17
 
@@ -79,9 +80,10 @@ async def initialize_rag():
79
 
80
  await rag.initialize_storages()
81
  await initialize_pipeline_status()
82
-
83
  return rag
84
 
 
85
  def main():
86
  # Initialize RAG instance
87
  rag = asyncio.run(initialize_rag())
@@ -98,5 +100,6 @@ def main():
98
 
99
  print(response)
100
 
 
101
  if __name__ == "__main__":
102
  main()
 
12
 
13
  import asyncio
14
  import nest_asyncio
15
+
16
  # Apply nest_asyncio to solve event loop issues
17
  nest_asyncio.apply()
18
 
 
80
 
81
  await rag.initialize_storages()
82
  await initialize_pipeline_status()
83
+
84
  return rag
85
 
86
+
87
  def main():
88
  # Initialize RAG instance
89
  rag = asyncio.run(initialize_rag())
 
100
 
101
  print(response)
102
 
103
+
104
  if __name__ == "__main__":
105
  main()
examples/lightrag_hf_demo.py CHANGED
@@ -16,6 +16,7 @@ WORKING_DIR = "./dickens"
16
  if not os.path.exists(WORKING_DIR):
17
  os.mkdir(WORKING_DIR)
18
 
 
19
  async def initialize_rag():
20
  rag = LightRAG(
21
  working_dir=WORKING_DIR,
@@ -41,6 +42,7 @@ async def initialize_rag():
41
 
42
  return rag
43
 
 
44
  def main():
45
  rag = asyncio.run(initialize_rag())
46
 
@@ -49,23 +51,32 @@ def main():
49
 
50
  # Perform naive search
51
  print(
52
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
53
  )
54
 
55
  # Perform local search
56
  print(
57
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
58
  )
59
 
60
  # Perform global search
61
  print(
62
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
63
  )
64
 
65
  # Perform hybrid search
66
  print(
67
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
68
  )
69
 
 
70
  if __name__ == "__main__":
71
  main()
 
16
  if not os.path.exists(WORKING_DIR):
17
  os.mkdir(WORKING_DIR)
18
 
19
+
20
  async def initialize_rag():
21
  rag = LightRAG(
22
  working_dir=WORKING_DIR,
 
42
 
43
  return rag
44
 
45
+
46
  def main():
47
  rag = asyncio.run(initialize_rag())
48
 
 
51
 
52
  # Perform naive search
53
  print(
54
+ rag.query(
55
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
56
+ )
57
  )
58
 
59
  # Perform local search
60
  print(
61
+ rag.query(
62
+ "What are the top themes in this story?", param=QueryParam(mode="local")
63
+ )
64
  )
65
 
66
  # Perform global search
67
  print(
68
+ rag.query(
69
+ "What are the top themes in this story?", param=QueryParam(mode="global")
70
+ )
71
  )
72
 
73
  # Perform hybrid search
74
  print(
75
+ rag.query(
76
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
77
+ )
78
  )
79
 
80
+
81
  if __name__ == "__main__":
82
  main()
examples/lightrag_llamaindex_direct_demo.py CHANGED
@@ -83,7 +83,7 @@ async def get_embedding_dim():
83
 
84
  async def initialize_rag():
85
  embedding_dimension = await get_embedding_dim()
86
-
87
  rag = LightRAG(
88
  working_dir=WORKING_DIR,
89
  llm_model_func=llm_model_func,
@@ -96,7 +96,7 @@ async def initialize_rag():
96
 
97
  await rag.initialize_storages()
98
  await initialize_pipeline_status()
99
-
100
  return rag
101
 
102
 
@@ -111,23 +111,32 @@ def main():
111
  # Test different query modes
112
  print("\nNaive Search:")
113
  print(
114
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
115
  )
116
 
117
  print("\nLocal Search:")
118
  print(
119
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
120
  )
121
 
122
  print("\nGlobal Search:")
123
  print(
124
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
125
  )
126
 
127
  print("\nHybrid Search:")
128
  print(
129
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
130
  )
131
 
 
132
  if __name__ == "__main__":
133
  main()
 
83
 
84
  async def initialize_rag():
85
  embedding_dimension = await get_embedding_dim()
86
+
87
  rag = LightRAG(
88
  working_dir=WORKING_DIR,
89
  llm_model_func=llm_model_func,
 
96
 
97
  await rag.initialize_storages()
98
  await initialize_pipeline_status()
99
+
100
  return rag
101
 
102
 
 
111
  # Test different query modes
112
  print("\nNaive Search:")
113
  print(
114
+ rag.query(
115
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
116
+ )
117
  )
118
 
119
  print("\nLocal Search:")
120
  print(
121
+ rag.query(
122
+ "What are the top themes in this story?", param=QueryParam(mode="local")
123
+ )
124
  )
125
 
126
  print("\nGlobal Search:")
127
  print(
128
+ rag.query(
129
+ "What are the top themes in this story?", param=QueryParam(mode="global")
130
+ )
131
  )
132
 
133
  print("\nHybrid Search:")
134
  print(
135
+ rag.query(
136
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
137
+ )
138
  )
139
 
140
+
141
  if __name__ == "__main__":
142
  main()
examples/lightrag_llamaindex_litellm_demo.py CHANGED
@@ -86,7 +86,7 @@ async def get_embedding_dim():
86
 
87
  async def initialize_rag():
88
  embedding_dimension = await get_embedding_dim()
89
-
90
  rag = LightRAG(
91
  working_dir=WORKING_DIR,
92
  llm_model_func=llm_model_func,
@@ -99,7 +99,7 @@ async def initialize_rag():
99
 
100
  await rag.initialize_storages()
101
  await initialize_pipeline_status()
102
-
103
  return rag
104
 
105
 
@@ -114,23 +114,32 @@ def main():
114
  # Test different query modes
115
  print("\nNaive Search:")
116
  print(
117
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
118
  )
119
 
120
  print("\nLocal Search:")
121
  print(
122
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
123
  )
124
 
125
  print("\nGlobal Search:")
126
  print(
127
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
128
  )
129
 
130
  print("\nHybrid Search:")
131
  print(
132
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
133
  )
134
 
 
135
  if __name__ == "__main__":
136
  main()
 
86
 
87
  async def initialize_rag():
88
  embedding_dimension = await get_embedding_dim()
89
+
90
  rag = LightRAG(
91
  working_dir=WORKING_DIR,
92
  llm_model_func=llm_model_func,
 
99
 
100
  await rag.initialize_storages()
101
  await initialize_pipeline_status()
102
+
103
  return rag
104
 
105
 
 
114
  # Test different query modes
115
  print("\nNaive Search:")
116
  print(
117
+ rag.query(
118
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
119
+ )
120
  )
121
 
122
  print("\nLocal Search:")
123
  print(
124
+ rag.query(
125
+ "What are the top themes in this story?", param=QueryParam(mode="local")
126
+ )
127
  )
128
 
129
  print("\nGlobal Search:")
130
  print(
131
+ rag.query(
132
+ "What are the top themes in this story?", param=QueryParam(mode="global")
133
+ )
134
  )
135
 
136
  print("\nHybrid Search:")
137
  print(
138
+ rag.query(
139
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
140
+ )
141
  )
142
 
143
+
144
  if __name__ == "__main__":
145
  main()
examples/lightrag_lmdeploy_demo.py CHANGED
@@ -41,6 +41,7 @@ async def lmdeploy_model_complete(
41
  **kwargs,
42
  )
43
 
 
44
  async def initialize_rag():
45
  rag = LightRAG(
46
  working_dir=WORKING_DIR,
@@ -63,9 +64,10 @@ async def initialize_rag():
63
 
64
  await rag.initialize_storages()
65
  await initialize_pipeline_status()
66
-
67
  return rag
68
 
 
69
  def main():
70
  # Initialize RAG instance
71
  rag = asyncio.run(initialize_rag())
@@ -77,23 +79,32 @@ def main():
77
  # Test different query modes
78
  print("\nNaive Search:")
79
  print(
80
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
81
  )
82
 
83
  print("\nLocal Search:")
84
  print(
85
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
86
  )
87
 
88
  print("\nGlobal Search:")
89
  print(
90
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
91
  )
92
 
93
  print("\nHybrid Search:")
94
  print(
95
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
96
  )
97
 
 
98
  if __name__ == "__main__":
99
- main()
 
41
  **kwargs,
42
  )
43
 
44
+
45
  async def initialize_rag():
46
  rag = LightRAG(
47
  working_dir=WORKING_DIR,
 
64
 
65
  await rag.initialize_storages()
66
  await initialize_pipeline_status()
67
+
68
  return rag
69
 
70
+
71
  def main():
72
  # Initialize RAG instance
73
  rag = asyncio.run(initialize_rag())
 
79
  # Test different query modes
80
  print("\nNaive Search:")
81
  print(
82
+ rag.query(
83
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
84
+ )
85
  )
86
 
87
  print("\nLocal Search:")
88
  print(
89
+ rag.query(
90
+ "What are the top themes in this story?", param=QueryParam(mode="local")
91
+ )
92
  )
93
 
94
  print("\nGlobal Search:")
95
  print(
96
+ rag.query(
97
+ "What are the top themes in this story?", param=QueryParam(mode="global")
98
+ )
99
  )
100
 
101
  print("\nHybrid Search:")
102
  print(
103
+ rag.query(
104
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
105
+ )
106
  )
107
 
108
+
109
  if __name__ == "__main__":
110
+ main()
examples/lightrag_nvidia_demo.py CHANGED
@@ -97,6 +97,7 @@ async def test_funcs():
97
 
98
  # asyncio.run(test_funcs())
99
 
 
100
  async def initialize_rag():
101
  embedding_dimension = await get_embedding_dim()
102
  print(f"Detected embedding dimension: {embedding_dimension}")
@@ -117,8 +118,10 @@ async def initialize_rag():
117
 
118
  await rag.initialize_storages()
119
  await initialize_pipeline_status()
120
-
121
  return rag
 
 
122
  async def main():
123
  try:
124
  # Initialize RAG instance
 
97
 
98
  # asyncio.run(test_funcs())
99
 
100
+
101
  async def initialize_rag():
102
  embedding_dimension = await get_embedding_dim()
103
  print(f"Detected embedding dimension: {embedding_dimension}")
 
118
 
119
  await rag.initialize_storages()
120
  await initialize_pipeline_status()
121
+
122
  return rag
123
+
124
+
125
  async def main():
126
  try:
127
  # Initialize RAG instance
examples/lightrag_ollama_age_demo.py CHANGED
@@ -27,6 +27,7 @@ os.environ["AGE_POSTGRES_HOST"] = "localhost"
27
  os.environ["AGE_POSTGRES_PORT"] = "5455"
28
  os.environ["AGE_GRAPH_NAME"] = "dickens"
29
 
 
30
  async def initialize_rag():
31
  rag = LightRAG(
32
  working_dir=WORKING_DIR,
@@ -34,7 +35,10 @@ async def initialize_rag():
34
  llm_model_name="llama3.1:8b",
35
  llm_model_max_async=4,
36
  llm_model_max_token_size=32768,
37
- llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}},
 
 
 
38
  embedding_func=EmbeddingFunc(
39
  embedding_dim=768,
40
  max_token_size=8192,
@@ -47,13 +51,15 @@ async def initialize_rag():
47
 
48
  await rag.initialize_storages()
49
  await initialize_pipeline_status()
50
-
51
  return rag
52
 
 
53
  async def print_stream(stream):
54
  async for chunk in stream:
55
  print(chunk, end="", flush=True)
56
 
 
57
  def main():
58
  # Initialize RAG instance
59
  rag = asyncio.run(initialize_rag())
@@ -65,22 +71,30 @@ def main():
65
  # Test different query modes
66
  print("\nNaive Search:")
67
  print(
68
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
69
  )
70
 
71
  print("\nLocal Search:")
72
  print(
73
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
74
  )
75
 
76
  print("\nGlobal Search:")
77
  print(
78
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
79
  )
80
 
81
  print("\nHybrid Search:")
82
  print(
83
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
84
  )
85
 
86
  # stream response
@@ -94,5 +108,6 @@ def main():
94
  else:
95
  print(resp)
96
 
 
97
  if __name__ == "__main__":
98
  main()
 
27
  os.environ["AGE_POSTGRES_PORT"] = "5455"
28
  os.environ["AGE_GRAPH_NAME"] = "dickens"
29
 
30
+
31
  async def initialize_rag():
32
  rag = LightRAG(
33
  working_dir=WORKING_DIR,
 
35
  llm_model_name="llama3.1:8b",
36
  llm_model_max_async=4,
37
  llm_model_max_token_size=32768,
38
+ llm_model_kwargs={
39
+ "host": "http://localhost:11434",
40
+ "options": {"num_ctx": 32768},
41
+ },
42
  embedding_func=EmbeddingFunc(
43
  embedding_dim=768,
44
  max_token_size=8192,
 
51
 
52
  await rag.initialize_storages()
53
  await initialize_pipeline_status()
54
+
55
  return rag
56
 
57
+
58
  async def print_stream(stream):
59
  async for chunk in stream:
60
  print(chunk, end="", flush=True)
61
 
62
+
63
  def main():
64
  # Initialize RAG instance
65
  rag = asyncio.run(initialize_rag())
 
71
  # Test different query modes
72
  print("\nNaive Search:")
73
  print(
74
+ rag.query(
75
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
76
+ )
77
  )
78
 
79
  print("\nLocal Search:")
80
  print(
81
+ rag.query(
82
+ "What are the top themes in this story?", param=QueryParam(mode="local")
83
+ )
84
  )
85
 
86
  print("\nGlobal Search:")
87
  print(
88
+ rag.query(
89
+ "What are the top themes in this story?", param=QueryParam(mode="global")
90
+ )
91
  )
92
 
93
  print("\nHybrid Search:")
94
  print(
95
+ rag.query(
96
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
97
+ )
98
  )
99
 
100
  # stream response
 
108
  else:
109
  print(resp)
110
 
111
+
112
  if __name__ == "__main__":
113
  main()
examples/lightrag_ollama_demo.py CHANGED
@@ -17,6 +17,7 @@ logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
17
  if not os.path.exists(WORKING_DIR):
18
  os.mkdir(WORKING_DIR)
19
 
 
20
  async def initialize_rag():
21
  rag = LightRAG(
22
  working_dir=WORKING_DIR,
@@ -24,7 +25,10 @@ async def initialize_rag():
24
  llm_model_name="gemma2:2b",
25
  llm_model_max_async=4,
26
  llm_model_max_token_size=32768,
27
- llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}},
 
 
 
28
  embedding_func=EmbeddingFunc(
29
  embedding_dim=768,
30
  max_token_size=8192,
@@ -36,13 +40,15 @@ async def initialize_rag():
36
 
37
  await rag.initialize_storages()
38
  await initialize_pipeline_status()
39
-
40
  return rag
41
 
 
42
  async def print_stream(stream):
43
  async for chunk in stream:
44
  print(chunk, end="", flush=True)
45
 
 
46
  def main():
47
  # Initialize RAG instance
48
  rag = asyncio.run(initialize_rag())
@@ -54,22 +60,30 @@ def main():
54
  # Test different query modes
55
  print("\nNaive Search:")
56
  print(
57
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
58
  )
59
 
60
  print("\nLocal Search:")
61
  print(
62
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
63
  )
64
 
65
  print("\nGlobal Search:")
66
  print(
67
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
68
  )
69
 
70
  print("\nHybrid Search:")
71
  print(
72
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
73
  )
74
 
75
  # stream response
@@ -83,5 +97,6 @@ def main():
83
  else:
84
  print(resp)
85
 
 
86
  if __name__ == "__main__":
87
  main()
 
17
  if not os.path.exists(WORKING_DIR):
18
  os.mkdir(WORKING_DIR)
19
 
20
+
21
  async def initialize_rag():
22
  rag = LightRAG(
23
  working_dir=WORKING_DIR,
 
25
  llm_model_name="gemma2:2b",
26
  llm_model_max_async=4,
27
  llm_model_max_token_size=32768,
28
+ llm_model_kwargs={
29
+ "host": "http://localhost:11434",
30
+ "options": {"num_ctx": 32768},
31
+ },
32
  embedding_func=EmbeddingFunc(
33
  embedding_dim=768,
34
  max_token_size=8192,
 
40
 
41
  await rag.initialize_storages()
42
  await initialize_pipeline_status()
43
+
44
  return rag
45
 
46
+
47
  async def print_stream(stream):
48
  async for chunk in stream:
49
  print(chunk, end="", flush=True)
50
 
51
+
52
  def main():
53
  # Initialize RAG instance
54
  rag = asyncio.run(initialize_rag())
 
60
  # Test different query modes
61
  print("\nNaive Search:")
62
  print(
63
+ rag.query(
64
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
65
+ )
66
  )
67
 
68
  print("\nLocal Search:")
69
  print(
70
+ rag.query(
71
+ "What are the top themes in this story?", param=QueryParam(mode="local")
72
+ )
73
  )
74
 
75
  print("\nGlobal Search:")
76
  print(
77
+ rag.query(
78
+ "What are the top themes in this story?", param=QueryParam(mode="global")
79
+ )
80
  )
81
 
82
  print("\nHybrid Search:")
83
  print(
84
+ rag.query(
85
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
86
+ )
87
  )
88
 
89
  # stream response
 
97
  else:
98
  print(resp)
99
 
100
+
101
  if __name__ == "__main__":
102
  main()
examples/lightrag_ollama_gremlin_demo.py CHANGED
@@ -32,6 +32,7 @@ os.environ["GREMLIN_TRAVERSE_SOURCE"] = "g"
32
  os.environ["GREMLIN_USER"] = ""
33
  os.environ["GREMLIN_PASSWORD"] = ""
34
 
 
35
  async def initialize_rag():
36
  rag = LightRAG(
37
  working_dir=WORKING_DIR,
@@ -39,7 +40,10 @@ async def initialize_rag():
39
  llm_model_name="llama3.1:8b",
40
  llm_model_max_async=4,
41
  llm_model_max_token_size=32768,
42
- llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}},
 
 
 
43
  embedding_func=EmbeddingFunc(
44
  embedding_dim=768,
45
  max_token_size=8192,
@@ -52,13 +56,15 @@ async def initialize_rag():
52
 
53
  await rag.initialize_storages()
54
  await initialize_pipeline_status()
55
-
56
  return rag
57
 
 
58
  async def print_stream(stream):
59
  async for chunk in stream:
60
  print(chunk, end="", flush=True)
61
 
 
62
  def main():
63
  # Initialize RAG instance
64
  rag = asyncio.run(initialize_rag())
@@ -70,22 +76,30 @@ def main():
70
  # Test different query modes
71
  print("\nNaive Search:")
72
  print(
73
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
74
  )
75
 
76
  print("\nLocal Search:")
77
  print(
78
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
79
  )
80
 
81
  print("\nGlobal Search:")
82
  print(
83
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
84
  )
85
 
86
  print("\nHybrid Search:")
87
  print(
88
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
89
  )
90
 
91
  # stream response
@@ -99,5 +113,6 @@ def main():
99
  else:
100
  print(resp)
101
 
 
102
  if __name__ == "__main__":
103
  main()
 
32
  os.environ["GREMLIN_USER"] = ""
33
  os.environ["GREMLIN_PASSWORD"] = ""
34
 
35
+
36
  async def initialize_rag():
37
  rag = LightRAG(
38
  working_dir=WORKING_DIR,
 
40
  llm_model_name="llama3.1:8b",
41
  llm_model_max_async=4,
42
  llm_model_max_token_size=32768,
43
+ llm_model_kwargs={
44
+ "host": "http://localhost:11434",
45
+ "options": {"num_ctx": 32768},
46
+ },
47
  embedding_func=EmbeddingFunc(
48
  embedding_dim=768,
49
  max_token_size=8192,
 
56
 
57
  await rag.initialize_storages()
58
  await initialize_pipeline_status()
59
+
60
  return rag
61
 
62
+
63
  async def print_stream(stream):
64
  async for chunk in stream:
65
  print(chunk, end="", flush=True)
66
 
67
+
68
  def main():
69
  # Initialize RAG instance
70
  rag = asyncio.run(initialize_rag())
 
76
  # Test different query modes
77
  print("\nNaive Search:")
78
  print(
79
+ rag.query(
80
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
81
+ )
82
  )
83
 
84
  print("\nLocal Search:")
85
  print(
86
+ rag.query(
87
+ "What are the top themes in this story?", param=QueryParam(mode="local")
88
+ )
89
  )
90
 
91
  print("\nGlobal Search:")
92
  print(
93
+ rag.query(
94
+ "What are the top themes in this story?", param=QueryParam(mode="global")
95
+ )
96
  )
97
 
98
  print("\nHybrid Search:")
99
  print(
100
+ rag.query(
101
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
102
+ )
103
  )
104
 
105
  # stream response
 
113
  else:
114
  print(resp)
115
 
116
+
117
  if __name__ == "__main__":
118
  main()
examples/lightrag_ollama_neo4j_milvus_mongo_demo.py CHANGED
@@ -32,6 +32,7 @@ os.environ["MILVUS_USER"] = "root"
32
  os.environ["MILVUS_PASSWORD"] = "root"
33
  os.environ["MILVUS_DB_NAME"] = "lightrag"
34
 
 
35
  async def initialize_rag():
36
  rag = LightRAG(
37
  working_dir=WORKING_DIR,
@@ -39,7 +40,10 @@ async def initialize_rag():
39
  llm_model_name="qwen2.5:14b",
40
  llm_model_max_async=4,
41
  llm_model_max_token_size=32768,
42
- llm_model_kwargs={"host": "http://127.0.0.1:11434", "options": {"num_ctx": 32768}},
 
 
 
43
  embedding_func=EmbeddingFunc(
44
  embedding_dim=1024,
45
  max_token_size=8192,
@@ -54,9 +58,10 @@ async def initialize_rag():
54
 
55
  await rag.initialize_storages()
56
  await initialize_pipeline_status()
57
-
58
  return rag
59
 
 
60
  def main():
61
  # Initialize RAG instance
62
  rag = asyncio.run(initialize_rag())
@@ -68,23 +73,32 @@ def main():
68
  # Test different query modes
69
  print("\nNaive Search:")
70
  print(
71
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
72
  )
73
 
74
  print("\nLocal Search:")
75
  print(
76
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
77
  )
78
 
79
  print("\nGlobal Search:")
80
  print(
81
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
82
  )
83
 
84
  print("\nHybrid Search:")
85
  print(
86
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
87
  )
88
 
 
89
  if __name__ == "__main__":
90
  main()
 
32
  os.environ["MILVUS_PASSWORD"] = "root"
33
  os.environ["MILVUS_DB_NAME"] = "lightrag"
34
 
35
+
36
  async def initialize_rag():
37
  rag = LightRAG(
38
  working_dir=WORKING_DIR,
 
40
  llm_model_name="qwen2.5:14b",
41
  llm_model_max_async=4,
42
  llm_model_max_token_size=32768,
43
+ llm_model_kwargs={
44
+ "host": "http://127.0.0.1:11434",
45
+ "options": {"num_ctx": 32768},
46
+ },
47
  embedding_func=EmbeddingFunc(
48
  embedding_dim=1024,
49
  max_token_size=8192,
 
58
 
59
  await rag.initialize_storages()
60
  await initialize_pipeline_status()
61
+
62
  return rag
63
 
64
+
65
  def main():
66
  # Initialize RAG instance
67
  rag = asyncio.run(initialize_rag())
 
73
  # Test different query modes
74
  print("\nNaive Search:")
75
  print(
76
+ rag.query(
77
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
78
+ )
79
  )
80
 
81
  print("\nLocal Search:")
82
  print(
83
+ rag.query(
84
+ "What are the top themes in this story?", param=QueryParam(mode="local")
85
+ )
86
  )
87
 
88
  print("\nGlobal Search:")
89
  print(
90
+ rag.query(
91
+ "What are the top themes in this story?", param=QueryParam(mode="global")
92
+ )
93
  )
94
 
95
  print("\nHybrid Search:")
96
  print(
97
+ rag.query(
98
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
99
+ )
100
  )
101
 
102
+
103
  if __name__ == "__main__":
104
  main()
examples/lightrag_openai_compatible_demo.py CHANGED
@@ -53,6 +53,7 @@ async def test_funcs():
53
 
54
  # asyncio.run(test_funcs())
55
 
 
56
  async def initialize_rag():
57
  embedding_dimension = await get_embedding_dim()
58
  print(f"Detected embedding dimension: {embedding_dimension}")
@@ -71,6 +72,8 @@ async def initialize_rag():
71
  await initialize_pipeline_status()
72
 
73
  return rag
 
 
74
  async def main():
75
  try:
76
  # Initialize RAG instance
 
53
 
54
  # asyncio.run(test_funcs())
55
 
56
+
57
  async def initialize_rag():
58
  embedding_dimension = await get_embedding_dim()
59
  print(f"Detected embedding dimension: {embedding_dimension}")
 
72
  await initialize_pipeline_status()
73
 
74
  return rag
75
+
76
+
77
  async def main():
78
  try:
79
  # Initialize RAG instance
examples/lightrag_openai_compatible_demo_embedding_cache.py CHANGED
@@ -53,6 +53,7 @@ async def test_funcs():
53
 
54
  # asyncio.run(test_funcs())
55
 
 
56
  async def initialize_rag():
57
  embedding_dimension = await get_embedding_dim()
58
  print(f"Detected embedding dimension: {embedding_dimension}")
@@ -76,6 +77,7 @@ async def initialize_rag():
76
 
77
  return rag
78
 
 
79
  async def main():
80
  try:
81
  # Initialize RAG instance
 
53
 
54
  # asyncio.run(test_funcs())
55
 
56
+
57
  async def initialize_rag():
58
  embedding_dimension = await get_embedding_dim()
59
  print(f"Detected embedding dimension: {embedding_dimension}")
 
77
 
78
  return rag
79
 
80
+
81
  async def main():
82
  try:
83
  # Initialize RAG instance
examples/lightrag_openai_compatible_stream_demo.py CHANGED
@@ -15,6 +15,8 @@ if not os.path.exists(WORKING_DIR):
15
  print(f"WorkingDir: {WORKING_DIR}")
16
 
17
  api_key = "empty"
 
 
18
  async def initialize_rag():
19
  rag = LightRAG(
20
  working_dir=WORKING_DIR,
@@ -40,11 +42,13 @@ async def initialize_rag():
40
 
41
  return rag
42
 
 
43
  async def print_stream(stream):
44
  async for chunk in stream:
45
  if chunk:
46
  print(chunk, end="", flush=True)
47
 
 
48
  def main():
49
  # Initialize RAG instance
50
  rag = asyncio.run(initialize_rag())
@@ -63,6 +67,6 @@ def main():
63
  else:
64
  print(resp)
65
 
 
66
  if __name__ == "__main__":
67
  main()
68
-
 
15
  print(f"WorkingDir: {WORKING_DIR}")
16
 
17
  api_key = "empty"
18
+
19
+
20
  async def initialize_rag():
21
  rag = LightRAG(
22
  working_dir=WORKING_DIR,
 
42
 
43
  return rag
44
 
45
+
46
  async def print_stream(stream):
47
  async for chunk in stream:
48
  if chunk:
49
  print(chunk, end="", flush=True)
50
 
51
+
52
  def main():
53
  # Initialize RAG instance
54
  rag = asyncio.run(initialize_rag())
 
67
  else:
68
  print(resp)
69
 
70
+
71
  if __name__ == "__main__":
72
  main()
 
examples/lightrag_openai_demo.py CHANGED
@@ -9,6 +9,7 @@ WORKING_DIR = "./dickens"
9
  if not os.path.exists(WORKING_DIR):
10
  os.mkdir(WORKING_DIR)
11
 
 
12
  async def initialize_rag():
13
  rag = LightRAG(
14
  working_dir=WORKING_DIR,
@@ -22,6 +23,7 @@ async def initialize_rag():
22
 
23
  return rag
24
 
 
25
  def main():
26
  # Initialize RAG instance
27
  rag = asyncio.run(initialize_rag())
@@ -31,24 +33,32 @@ def main():
31
 
32
  # Perform naive search
33
  print(
34
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
35
  )
36
 
37
  # Perform local search
38
  print(
39
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
40
  )
41
 
42
  # Perform global search
43
  print(
44
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
45
  )
46
 
47
  # Perform hybrid search
48
  print(
49
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
50
  )
51
 
 
52
  if __name__ == "__main__":
53
  main()
54
-
 
9
  if not os.path.exists(WORKING_DIR):
10
  os.mkdir(WORKING_DIR)
11
 
12
+
13
  async def initialize_rag():
14
  rag = LightRAG(
15
  working_dir=WORKING_DIR,
 
23
 
24
  return rag
25
 
26
+
27
  def main():
28
  # Initialize RAG instance
29
  rag = asyncio.run(initialize_rag())
 
33
 
34
  # Perform naive search
35
  print(
36
+ rag.query(
37
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
38
+ )
39
  )
40
 
41
  # Perform local search
42
  print(
43
+ rag.query(
44
+ "What are the top themes in this story?", param=QueryParam(mode="local")
45
+ )
46
  )
47
 
48
  # Perform global search
49
  print(
50
+ rag.query(
51
+ "What are the top themes in this story?", param=QueryParam(mode="global")
52
+ )
53
  )
54
 
55
  # Perform hybrid search
56
  print(
57
+ rag.query(
58
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
59
+ )
60
  )
61
 
62
+
63
  if __name__ == "__main__":
64
  main()
 
examples/lightrag_openai_mongodb_graph_demo.py CHANGED
@@ -76,23 +76,32 @@ def main():
76
 
77
  # Perform naive search
78
  print(
79
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
80
  )
81
 
82
  # Perform local search
83
  print(
84
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
85
  )
86
 
87
  # Perform global search
88
  print(
89
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
90
  )
91
 
92
  # Perform hybrid search
93
  print(
94
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
95
  )
96
 
 
97
  if __name__ == "__main__":
98
- main()
 
76
 
77
  # Perform naive search
78
  print(
79
+ rag.query(
80
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
81
+ )
82
  )
83
 
84
  # Perform local search
85
  print(
86
+ rag.query(
87
+ "What are the top themes in this story?", param=QueryParam(mode="local")
88
+ )
89
  )
90
 
91
  # Perform global search
92
  print(
93
+ rag.query(
94
+ "What are the top themes in this story?", param=QueryParam(mode="global")
95
+ )
96
  )
97
 
98
  # Perform hybrid search
99
  print(
100
+ rag.query(
101
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
102
+ )
103
  )
104
 
105
+
106
  if __name__ == "__main__":
107
+ main()
examples/lightrag_openai_neo4j_milvus_redis_demo.py CHANGED
@@ -50,6 +50,8 @@ embedding_func = EmbeddingFunc(
50
  texts, embed_model="shaw/dmeta-embedding-zh", host="http://117.50.173.35:11434"
51
  ),
52
  )
 
 
53
  async def initialize_rag():
54
  rag = LightRAG(
55
  working_dir=WORKING_DIR,
@@ -79,23 +81,32 @@ def main():
79
 
80
  # Perform naive search
81
  print(
82
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
83
  )
84
 
85
  # Perform local search
86
  print(
87
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
88
  )
89
 
90
  # Perform global search
91
  print(
92
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
93
  )
94
 
95
  # Perform hybrid search
96
  print(
97
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
98
  )
99
 
 
100
  if __name__ == "__main__":
101
  main()
 
50
  texts, embed_model="shaw/dmeta-embedding-zh", host="http://117.50.173.35:11434"
51
  ),
52
  )
53
+
54
+
55
  async def initialize_rag():
56
  rag = LightRAG(
57
  working_dir=WORKING_DIR,
 
81
 
82
  # Perform naive search
83
  print(
84
+ rag.query(
85
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
86
+ )
87
  )
88
 
89
  # Perform local search
90
  print(
91
+ rag.query(
92
+ "What are the top themes in this story?", param=QueryParam(mode="local")
93
+ )
94
  )
95
 
96
  # Perform global search
97
  print(
98
+ rag.query(
99
+ "What are the top themes in this story?", param=QueryParam(mode="global")
100
+ )
101
  )
102
 
103
  # Perform hybrid search
104
  print(
105
+ rag.query(
106
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
107
+ )
108
  )
109
 
110
+
111
  if __name__ == "__main__":
112
  main()
examples/lightrag_oracle_demo.py CHANGED
@@ -64,6 +64,7 @@ async def get_embedding_dim():
64
  embedding_dim = embedding.shape[1]
65
  return embedding_dim
66
 
 
67
  async def initialize_rag():
68
  # Detect embedding dimension
69
  embedding_dimension = await get_embedding_dim()
@@ -102,6 +103,7 @@ async def initialize_rag():
102
 
103
  return rag
104
 
 
105
  async def main():
106
  try:
107
  # Initialize RAG instance
 
64
  embedding_dim = embedding.shape[1]
65
  return embedding_dim
66
 
67
+
68
  async def initialize_rag():
69
  # Detect embedding dimension
70
  embedding_dimension = await get_embedding_dim()
 
103
 
104
  return rag
105
 
106
+
107
  async def main():
108
  try:
109
  # Initialize RAG instance
examples/lightrag_siliconcloud_demo.py CHANGED
@@ -47,6 +47,7 @@ async def test_funcs():
47
 
48
  asyncio.run(test_funcs())
49
 
 
50
  async def initialize_rag():
51
  rag = LightRAG(
52
  working_dir=WORKING_DIR,
@@ -71,24 +72,32 @@ def main():
71
 
72
  # Perform naive search
73
  print(
74
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
75
  )
76
 
77
  # Perform local search
78
  print(
79
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
80
  )
81
 
82
  # Perform global search
83
  print(
84
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
85
  )
86
 
87
  # Perform hybrid search
88
  print(
89
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
90
  )
91
 
 
92
  if __name__ == "__main__":
93
  main()
94
-
 
47
 
48
  asyncio.run(test_funcs())
49
 
50
+
51
  async def initialize_rag():
52
  rag = LightRAG(
53
  working_dir=WORKING_DIR,
 
72
 
73
  # Perform naive search
74
  print(
75
+ rag.query(
76
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
77
+ )
78
  )
79
 
80
  # Perform local search
81
  print(
82
+ rag.query(
83
+ "What are the top themes in this story?", param=QueryParam(mode="local")
84
+ )
85
  )
86
 
87
  # Perform global search
88
  print(
89
+ rag.query(
90
+ "What are the top themes in this story?", param=QueryParam(mode="global")
91
+ )
92
  )
93
 
94
  # Perform hybrid search
95
  print(
96
+ rag.query(
97
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
98
+ )
99
  )
100
 
101
+
102
  if __name__ == "__main__":
103
  main()
 
examples/lightrag_tidb_demo.py CHANGED
@@ -55,6 +55,7 @@ async def get_embedding_dim():
55
  embedding_dim = embedding.shape[1]
56
  return embedding_dim
57
 
 
58
  async def initialize_rag():
59
  # Detect embedding dimension
60
  embedding_dimension = await get_embedding_dim()
@@ -82,6 +83,7 @@ async def initialize_rag():
82
 
83
  return rag
84
 
 
85
  async def main():
86
  try:
87
  # Initialize RAG instance
 
55
  embedding_dim = embedding.shape[1]
56
  return embedding_dim
57
 
58
+
59
  async def initialize_rag():
60
  # Detect embedding dimension
61
  embedding_dimension = await get_embedding_dim()
 
83
 
84
  return rag
85
 
86
+
87
  async def main():
88
  try:
89
  # Initialize RAG instance
examples/lightrag_zhipu_demo.py CHANGED
@@ -19,6 +19,7 @@ api_key = os.environ.get("ZHIPUAI_API_KEY")
19
  if api_key is None:
20
  raise Exception("Please set ZHIPU_API_KEY in your environment")
21
 
 
22
  async def initialize_rag():
23
  rag = LightRAG(
24
  working_dir=WORKING_DIR,
@@ -38,6 +39,7 @@ async def initialize_rag():
38
 
39
  return rag
40
 
 
41
  def main():
42
  # Initialize RAG instance
43
  rag = asyncio.run(initialize_rag())
@@ -47,23 +49,32 @@ def main():
47
 
48
  # Perform naive search
49
  print(
50
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
51
  )
52
 
53
  # Perform local search
54
  print(
55
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
56
  )
57
 
58
  # Perform global search
59
  print(
60
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
61
  )
62
 
63
  # Perform hybrid search
64
  print(
65
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
66
  )
67
 
 
68
  if __name__ == "__main__":
69
  main()
 
19
  if api_key is None:
20
  raise Exception("Please set ZHIPU_API_KEY in your environment")
21
 
22
+
23
  async def initialize_rag():
24
  rag = LightRAG(
25
  working_dir=WORKING_DIR,
 
39
 
40
  return rag
41
 
42
+
43
  def main():
44
  # Initialize RAG instance
45
  rag = asyncio.run(initialize_rag())
 
49
 
50
  # Perform naive search
51
  print(
52
+ rag.query(
53
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
54
+ )
55
  )
56
 
57
  # Perform local search
58
  print(
59
+ rag.query(
60
+ "What are the top themes in this story?", param=QueryParam(mode="local")
61
+ )
62
  )
63
 
64
  # Perform global search
65
  print(
66
+ rag.query(
67
+ "What are the top themes in this story?", param=QueryParam(mode="global")
68
+ )
69
  )
70
 
71
  # Perform hybrid search
72
  print(
73
+ rag.query(
74
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
75
+ )
76
  )
77
 
78
+
79
  if __name__ == "__main__":
80
  main()
examples/lightrag_zhipu_postgres_demo.py CHANGED
@@ -28,6 +28,7 @@ os.environ["POSTGRES_USER"] = "rag"
28
  os.environ["POSTGRES_PASSWORD"] = "rag"
29
  os.environ["POSTGRES_DATABASE"] = "rag"
30
 
 
31
  async def initialize_rag():
32
  rag = LightRAG(
33
  working_dir=WORKING_DIR,
@@ -55,8 +56,9 @@ async def initialize_rag():
55
 
56
  return rag
57
 
 
58
  async def main():
59
- # Initialize RAG instance
60
  rag = asyncio.run(initialize_rag())
61
 
62
  # add embedding_func for graph database, it's deleted in commit 5661d76860436f7bf5aef2e50d9ee4a59660146c
 
28
  os.environ["POSTGRES_PASSWORD"] = "rag"
29
  os.environ["POSTGRES_DATABASE"] = "rag"
30
 
31
+
32
  async def initialize_rag():
33
  rag = LightRAG(
34
  working_dir=WORKING_DIR,
 
56
 
57
  return rag
58
 
59
+
60
  async def main():
61
+ # Initialize RAG instance
62
  rag = asyncio.run(initialize_rag())
63
 
64
  # add embedding_func for graph database, it's deleted in commit 5661d76860436f7bf5aef2e50d9ee4a59660146c
examples/query_keyword_separation_example.py CHANGED
@@ -80,6 +80,8 @@ async def test_funcs():
80
  asyncio.run(test_funcs())
81
 
82
  embedding_dimension = 3072
 
 
83
  async def initialize_rag():
84
  rag = LightRAG(
85
  working_dir=WORKING_DIR,
@@ -101,7 +103,7 @@ async def initialize_rag():
101
  async def run_example():
102
  # Initialize RAG instance
103
  rag = asyncio.run(initialize_rag())
104
-
105
  book1 = open("./book_1.txt", encoding="utf-8")
106
  book2 = open("./book_2.txt", encoding="utf-8")
107
 
 
80
  asyncio.run(test_funcs())
81
 
82
  embedding_dimension = 3072
83
+
84
+
85
  async def initialize_rag():
86
  rag = LightRAG(
87
  working_dir=WORKING_DIR,
 
103
  async def run_example():
104
  # Initialize RAG instance
105
  rag = asyncio.run(initialize_rag())
106
+
107
  book1 = open("./book_1.txt", encoding="utf-8")
108
  book2 = open("./book_2.txt", encoding="utf-8")
109
 
examples/test.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from lightrag import LightRAG, QueryParam
3
  from lightrag.llm.openai import gpt_4o_mini_complete
4
  from lightrag.kg.shared_storage import initialize_pipeline_status
@@ -13,6 +14,7 @@ WORKING_DIR = "./dickens"
13
  if not os.path.exists(WORKING_DIR):
14
  os.mkdir(WORKING_DIR)
15
 
 
16
  async def initialize_rag():
17
  rag = LightRAG(
18
  working_dir=WORKING_DIR,
@@ -35,23 +37,32 @@ def main():
35
 
36
  # Perform naive search
37
  print(
38
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
39
  )
40
 
41
  # Perform local search
42
  print(
43
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
44
  )
45
 
46
  # Perform global search
47
  print(
48
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
49
  )
50
 
51
  # Perform hybrid search
52
  print(
53
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
54
  )
55
 
 
56
  if __name__ == "__main__":
57
- main()
 
1
  import os
2
+ import asyncio
3
  from lightrag import LightRAG, QueryParam
4
  from lightrag.llm.openai import gpt_4o_mini_complete
5
  from lightrag.kg.shared_storage import initialize_pipeline_status
 
14
  if not os.path.exists(WORKING_DIR):
15
  os.mkdir(WORKING_DIR)
16
 
17
+
18
  async def initialize_rag():
19
  rag = LightRAG(
20
  working_dir=WORKING_DIR,
 
37
 
38
  # Perform naive search
39
  print(
40
+ rag.query(
41
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
42
+ )
43
  )
44
 
45
  # Perform local search
46
  print(
47
+ rag.query(
48
+ "What are the top themes in this story?", param=QueryParam(mode="local")
49
+ )
50
  )
51
 
52
  # Perform global search
53
  print(
54
+ rag.query(
55
+ "What are the top themes in this story?", param=QueryParam(mode="global")
56
+ )
57
  )
58
 
59
  # Perform hybrid search
60
  print(
61
+ rag.query(
62
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
63
+ )
64
  )
65
 
66
+
67
  if __name__ == "__main__":
68
+ main()
examples/test_chromadb.py CHANGED
@@ -112,12 +112,13 @@ async def initialize_rag():
112
  },
113
  )
114
 
115
-
116
  await rag.initialize_storages()
117
  await initialize_pipeline_status()
118
 
119
  return rag
120
 
 
 
121
  # Initialize RAG instance
122
  rag = asyncio.run(initialize_rag())
123
 
@@ -126,23 +127,32 @@ async def initialize_rag():
126
 
127
  # Perform naive search
128
  print(
129
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
130
  )
131
 
132
  # Perform local search
133
  print(
134
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
135
  )
136
 
137
  # Perform global search
138
  print(
139
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
140
  )
141
 
142
  # Perform hybrid search
143
  print(
144
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
145
  )
146
 
 
147
  if __name__ == "__main__":
148
  main()
 
112
  },
113
  )
114
 
 
115
  await rag.initialize_storages()
116
  await initialize_pipeline_status()
117
 
118
  return rag
119
 
120
+
121
+ def main():
122
  # Initialize RAG instance
123
  rag = asyncio.run(initialize_rag())
124
 
 
127
 
128
  # Perform naive search
129
  print(
130
+ rag.query(
131
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
132
+ )
133
  )
134
 
135
  # Perform local search
136
  print(
137
+ rag.query(
138
+ "What are the top themes in this story?", param=QueryParam(mode="local")
139
+ )
140
  )
141
 
142
  # Perform global search
143
  print(
144
+ rag.query(
145
+ "What are the top themes in this story?", param=QueryParam(mode="global")
146
+ )
147
  )
148
 
149
  # Perform hybrid search
150
  print(
151
+ rag.query(
152
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
153
+ )
154
  )
155
 
156
+
157
  if __name__ == "__main__":
158
  main()
examples/test_faiss.py CHANGED
@@ -58,6 +58,7 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
58
  embeddings = model.encode(texts, convert_to_numpy=True)
59
  return embeddings
60
 
 
61
  async def initialize_rag():
62
  rag = LightRAG(
63
  working_dir=WORKING_DIR,
@@ -78,8 +79,8 @@ async def initialize_rag():
78
 
79
  return rag
80
 
 
81
  def main():
82
-
83
  # Initialize RAG instance
84
  rag = asyncio.run(initialize_rag())
85
  # Insert the custom chunks into LightRAG
 
58
  embeddings = model.encode(texts, convert_to_numpy=True)
59
  return embeddings
60
 
61
+
62
  async def initialize_rag():
63
  rag = LightRAG(
64
  working_dir=WORKING_DIR,
 
79
 
80
  return rag
81
 
82
+
83
  def main():
 
84
  # Initialize RAG instance
85
  rag = asyncio.run(initialize_rag())
86
  # Insert the custom chunks into LightRAG
examples/test_neo4j.py CHANGED
@@ -15,6 +15,7 @@ WORKING_DIR = "./local_neo4jWorkDir"
15
  if not os.path.exists(WORKING_DIR):
16
  os.mkdir(WORKING_DIR)
17
 
 
18
  async def initialize_rag():
19
  rag = LightRAG(
20
  working_dir=WORKING_DIR,
@@ -29,6 +30,7 @@ async def initialize_rag():
29
 
30
  return rag
31
 
 
32
  def main():
33
  # Initialize RAG instance
34
  rag = asyncio.run(initialize_rag())
@@ -38,23 +40,32 @@ def main():
38
 
39
  # Perform naive search
40
  print(
41
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
 
 
42
  )
43
 
44
  # Perform local search
45
  print(
46
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
 
 
47
  )
48
 
49
  # Perform global search
50
  print(
51
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
 
 
52
  )
53
 
54
  # Perform hybrid search
55
  print(
56
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
 
 
57
  )
58
 
 
59
  if __name__ == "__main__":
60
  main()
 
15
  if not os.path.exists(WORKING_DIR):
16
  os.mkdir(WORKING_DIR)
17
 
18
+
19
  async def initialize_rag():
20
  rag = LightRAG(
21
  working_dir=WORKING_DIR,
 
30
 
31
  return rag
32
 
33
+
34
  def main():
35
  # Initialize RAG instance
36
  rag = asyncio.run(initialize_rag())
 
40
 
41
  # Perform naive search
42
  print(
43
+ rag.query(
44
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
45
+ )
46
  )
47
 
48
  # Perform local search
49
  print(
50
+ rag.query(
51
+ "What are the top themes in this story?", param=QueryParam(mode="local")
52
+ )
53
  )
54
 
55
  # Perform global search
56
  print(
57
+ rag.query(
58
+ "What are the top themes in this story?", param=QueryParam(mode="global")
59
+ )
60
  )
61
 
62
  # Perform hybrid search
63
  print(
64
+ rag.query(
65
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
66
+ )
67
  )
68
 
69
+
70
  if __name__ == "__main__":
71
  main()
examples/test_split_by_character.ipynb DELETED
@@ -1,1313 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "4b5690db12e34685",
7
- "metadata": {
8
- "ExecuteTime": {
9
- "end_time": "2025-01-09T03:40:58.307102Z",
10
- "start_time": "2025-01-09T03:40:51.935233Z"
11
- }
12
- },
13
- "outputs": [],
14
- "source": [
15
- "import os\n",
16
- "import logging\n",
17
- "import numpy as np\n",
18
- "from lightrag import LightRAG, QueryParam\n",
19
- "from lightrag.llm.openai import openai_complete_if_cache, openai_embed\n",
20
- "from lightrag.utils import EmbeddingFunc\n",
21
- "from lightrag.kg.shared_storage import initialize_pipeline_status\n",
22
- "import nest_asyncio"
23
- ]
24
- },
25
- {
26
- "cell_type": "markdown",
27
- "id": "dd17956ec322b361",
28
- "metadata": {},
29
- "source": [
30
- "#### split by character"
31
- ]
32
- },
33
- {
34
- "cell_type": "code",
35
- "execution_count": 3,
36
- "id": "8c8ee7c061bf9159",
37
- "metadata": {
38
- "ExecuteTime": {
39
- "end_time": "2025-01-09T03:41:13.961167Z",
40
- "start_time": "2025-01-09T03:41:13.958357Z"
41
- }
42
- },
43
- "outputs": [],
44
- "source": [
45
- "nest_asyncio.apply()\n",
46
- "WORKING_DIR = \"../../llm_rag/paper_db/R000088_test1\"\n",
47
- "logging.basicConfig(format=\"%(levelname)s:%(message)s\", level=logging.INFO)\n",
48
- "if not os.path.exists(WORKING_DIR):\n",
49
- " os.mkdir(WORKING_DIR)\n",
50
- "API = os.environ.get(\"DOUBAO_API_KEY\")"
51
- ]
52
- },
53
- {
54
- "cell_type": "code",
55
- "execution_count": 4,
56
- "id": "a5009d16e0851dca",
57
- "metadata": {
58
- "ExecuteTime": {
59
- "end_time": "2025-01-09T03:41:16.862036Z",
60
- "start_time": "2025-01-09T03:41:16.859306Z"
61
- }
62
- },
63
- "outputs": [],
64
- "source": [
65
- "async def llm_model_func(\n",
66
- " prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs\n",
67
- ") -> str:\n",
68
- " return await openai_complete_if_cache(\n",
69
- " \"ep-20241218114828-2tlww\",\n",
70
- " prompt,\n",
71
- " system_prompt=system_prompt,\n",
72
- " history_messages=history_messages,\n",
73
- " api_key=API,\n",
74
- " base_url=\"https://ark.cn-beijing.volces.com/api/v3\",\n",
75
- " **kwargs,\n",
76
- " )\n",
77
- "\n",
78
- "\n",
79
- "async def embedding_func(texts: list[str]) -> np.ndarray:\n",
80
- " return await openai_embed(\n",
81
- " texts,\n",
82
- " model=\"ep-20241231173413-pgjmk\",\n",
83
- " api_key=API,\n",
84
- " base_url=\"https://ark.cn-beijing.volces.com/api/v3\",\n",
85
- " )"
86
- ]
87
- },
88
- {
89
- "cell_type": "code",
90
- "execution_count": 5,
91
- "id": "397fcad24ce4d0ed",
92
- "metadata": {
93
- "ExecuteTime": {
94
- "end_time": "2025-01-09T03:41:24.950307Z",
95
- "start_time": "2025-01-09T03:41:24.940353Z"
96
- }
97
- },
98
- "outputs": [
99
- {
100
- "name": "stderr",
101
- "output_type": "stream",
102
- "text": [
103
- "INFO:lightrag:Logger initialized for working directory: ../../llm_rag/paper_db/R000088_test1\n",
104
- "INFO:lightrag:Load KV llm_response_cache with 0 data\n",
105
- "INFO:lightrag:Load KV full_docs with 0 data\n",
106
- "INFO:lightrag:Load KV text_chunks with 0 data\n",
107
- "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test1/vdb_entities.json'} 0 data\n",
108
- "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test1/vdb_relationships.json'} 0 data\n",
109
- "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test1/vdb_chunks.json'} 0 data\n",
110
- "INFO:lightrag:Loaded document status storage with 0 records\n"
111
- ]
112
- }
113
- ],
114
- "source": [
115
- "import asyncio\n",
116
- "import nest_asyncio\n",
117
- "\n",
118
- "nest_asyncio.apply()\n",
119
- "\n",
120
- "async def initialize_rag():\n",
121
- " rag = LightRAG(\n",
122
- " working_dir=WORKING_DIR,\n",
123
- " llm_model_func=llm_model_func,\n",
124
- " embedding_func=EmbeddingFunc(\n",
125
- " embedding_dim=4096, max_token_size=8192, func=embedding_func\n",
126
- " ),\n",
127
- " chunk_token_size=512,\n",
128
- " )\n",
129
- " await rag.initialize_storages()\n",
130
- " await initialize_pipeline_status()\n",
131
- "\n",
132
- " return rag\n",
133
- "\n",
134
- "rag = asyncio.run(initialize_rag())"
135
- ]
136
- },
137
- {
138
- "cell_type": "code",
139
- "execution_count": 6,
140
- "id": "1dc3603677f7484d",
141
- "metadata": {
142
- "ExecuteTime": {
143
- "end_time": "2025-01-09T03:41:37.947456Z",
144
- "start_time": "2025-01-09T03:41:37.941901Z"
145
- }
146
- },
147
- "outputs": [],
148
- "source": [
149
- "with open(\n",
150
- " \"../../llm_rag/example/R000088/auto/R000088_full_txt.md\", \"r\", encoding=\"utf-8\"\n",
151
- ") as f:\n",
152
- " content = f.read()\n",
153
- "\n",
154
- "\n",
155
- "async def embedding_func(texts: list[str]) -> np.ndarray:\n",
156
- " return await openai_embed(\n",
157
- " texts,\n",
158
- " model=\"ep-20241231173413-pgjmk\",\n",
159
- " api_key=API,\n",
160
- " base_url=\"https://ark.cn-beijing.volces.com/api/v3\",\n",
161
- " )\n",
162
- "\n",
163
- "\n",
164
- "async def get_embedding_dim():\n",
165
- " test_text = [\"This is a test sentence.\"]\n",
166
- " embedding = await embedding_func(test_text)\n",
167
- " embedding_dim = embedding.shape[1]\n",
168
- " return embedding_dim"
169
- ]
170
- },
171
- {
172
- "cell_type": "code",
173
- "execution_count": 7,
174
- "id": "6844202606acfbe5",
175
- "metadata": {
176
- "ExecuteTime": {
177
- "end_time": "2025-01-09T03:41:39.608541Z",
178
- "start_time": "2025-01-09T03:41:39.165057Z"
179
- }
180
- },
181
- "outputs": [
182
- {
183
- "name": "stderr",
184
- "output_type": "stream",
185
- "text": [
186
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n"
187
- ]
188
- }
189
- ],
190
- "source": [
191
- "embedding_dimension = await get_embedding_dim()"
192
- ]
193
- },
194
- {
195
- "cell_type": "code",
196
- "execution_count": 8,
197
- "id": "d6273839d9681403",
198
- "metadata": {
199
- "ExecuteTime": {
200
- "end_time": "2025-01-09T03:44:34.295345Z",
201
- "start_time": "2025-01-09T03:41:48.324171Z"
202
- }
203
- },
204
- "outputs": [
205
- {
206
- "name": "stderr",
207
- "output_type": "stream",
208
- "text": [
209
- "INFO:lightrag:Processing 1 new unique documents\n",
210
- "Processing batch 1: 0%| | 0/1 [00:00<?, ?it/s]INFO:lightrag:Inserting 35 vectors to chunks\n",
211
- "\n",
212
- "Generating embeddings: 0%| | 0/2 [00:00<?, ?batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
213
- "\n",
214
- "Generating embeddings: 50%|█████ | 1/2 [00:00<00:00, 1.36batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
215
- "\n",
216
- "Generating embeddings: 100%|██████████| 2/2 [00:04<00:00, 2.25s/batch]\u001b[A\n",
217
- "\n",
218
- "Extracting entities from chunks: 0%| | 0/35 [00:00<?, ?chunk/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
219
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
220
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
221
- ]
222
- },
223
- {
224
- "name": "stdout",
225
- "output_type": "stream",
226
- "text": [
227
- "⠙ Processed 1 chunks, 1 entities(duplicated), 0 relations(duplicated)\r"
228
- ]
229
- },
230
- {
231
- "name": "stderr",
232
- "output_type": "stream",
233
- "text": [
234
- "\n",
235
- "Extracting entities from chunks: 3%|▎ | 1/35 [00:04<02:47, 4.93s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
236
- ]
237
- },
238
- {
239
- "name": "stdout",
240
- "output_type": "stream",
241
- "text": [
242
- "⠹ Processed 2 chunks, 2 entities(duplicated), 0 relations(duplicated)\r"
243
- ]
244
- },
245
- {
246
- "name": "stderr",
247
- "output_type": "stream",
248
- "text": [
249
- "\n",
250
- "Extracting entities from chunks: 6%|▌ | 2/35 [00:05<01:18, 2.37s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
251
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
252
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
253
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
254
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
255
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
256
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
257
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
258
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
259
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
260
- ]
261
- },
262
- {
263
- "name": "stdout",
264
- "output_type": "stream",
265
- "text": [
266
- "⠸ Processed 3 chunks, 9 entities(duplicated), 5 relations(duplicated)\r"
267
- ]
268
- },
269
- {
270
- "name": "stderr",
271
- "output_type": "stream",
272
- "text": [
273
- "\n",
274
- "Extracting entities from chunks: 9%|▊ | 3/35 [00:26<05:43, 10.73s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
275
- ]
276
- },
277
- {
278
- "name": "stdout",
279
- "output_type": "stream",
280
- "text": [
281
- "⠼ Processed 4 chunks, 16 entities(duplicated), 11 relations(duplicated)\r"
282
- ]
283
- },
284
- {
285
- "name": "stderr",
286
- "output_type": "stream",
287
- "text": [
288
- "\n",
289
- "Extracting entities from chunks: 11%|█▏ | 4/35 [00:26<03:24, 6.60s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
290
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
291
- ]
292
- },
293
- {
294
- "name": "stdout",
295
- "output_type": "stream",
296
- "text": [
297
- "⠴ Processed 5 chunks, 24 entities(duplicated), 18 relations(duplicated)\r"
298
- ]
299
- },
300
- {
301
- "name": "stderr",
302
- "output_type": "stream",
303
- "text": [
304
- "\n",
305
- "Extracting entities from chunks: 14%|█▍ | 5/35 [00:33<03:24, 6.82s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
306
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
307
- ]
308
- },
309
- {
310
- "name": "stdout",
311
- "output_type": "stream",
312
- "text": [
313
- "⠦ Processed 6 chunks, 35 entities(duplicated), 28 relations(duplicated)\r"
314
- ]
315
- },
316
- {
317
- "name": "stderr",
318
- "output_type": "stream",
319
- "text": [
320
- "\n",
321
- "Extracting entities from chunks: 17%|█▋ | 6/35 [00:42<03:38, 7.53s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
322
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
323
- ]
324
- },
325
- {
326
- "name": "stdout",
327
- "output_type": "stream",
328
- "text": [
329
- "⠧ Processed 7 chunks, 47 entities(duplicated), 36 relations(duplicated)\r"
330
- ]
331
- },
332
- {
333
- "name": "stderr",
334
- "output_type": "stream",
335
- "text": [
336
- "\n",
337
- "Extracting entities from chunks: 20%|██ | 7/35 [00:43<02:28, 5.31s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
338
- ]
339
- },
340
- {
341
- "name": "stdout",
342
- "output_type": "stream",
343
- "text": [
344
- "⠇ Processed 8 chunks, 61 entities(duplicated), 49 relations(duplicated)\r"
345
- ]
346
- },
347
- {
348
- "name": "stderr",
349
- "output_type": "stream",
350
- "text": [
351
- "\n",
352
- "Extracting entities from chunks: 23%|██▎ | 8/35 [00:45<01:52, 4.16s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
353
- ]
354
- },
355
- {
356
- "name": "stdout",
357
- "output_type": "stream",
358
- "text": [
359
- "⠏ Processed 9 chunks, 81 entities(duplicated), 49 relations(duplicated)\r"
360
- ]
361
- },
362
- {
363
- "name": "stderr",
364
- "output_type": "stream",
365
- "text": [
366
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
367
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
368
- ]
369
- },
370
- {
371
- "name": "stdout",
372
- "output_type": "stream",
373
- "text": [
374
- "⠋ Processed 10 chunks, 90 entities(duplicated), 62 relations(duplicated)\r"
375
- ]
376
- },
377
- {
378
- "name": "stderr",
379
- "output_type": "stream",
380
- "text": [
381
- "\n",
382
- "Extracting entities from chunks: 29%|██▊ | 10/35 [00:46<01:06, 2.64s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
383
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
384
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
385
- ]
386
- },
387
- {
388
- "name": "stdout",
389
- "output_type": "stream",
390
- "text": [
391
- "⠙ Processed 11 chunks, 101 entities(duplicated), 80 relations(duplicated)\r"
392
- ]
393
- },
394
- {
395
- "name": "stderr",
396
- "output_type": "stream",
397
- "text": [
398
- "\n",
399
- "Extracting entities from chunks: 31%|███▏ | 11/35 [00:52<01:19, 3.31s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
400
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
401
- ]
402
- },
403
- {
404
- "name": "stdout",
405
- "output_type": "stream",
406
- "text": [
407
- "⠹ Processed 12 chunks, 108 entities(duplicated), 85 relations(duplicated)\r"
408
- ]
409
- },
410
- {
411
- "name": "stderr",
412
- "output_type": "stream",
413
- "text": [
414
- "\n",
415
- "Extracting entities from chunks: 34%|███▍ | 12/35 [00:54<01:11, 3.12s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
416
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
417
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
418
- ]
419
- },
420
- {
421
- "name": "stdout",
422
- "output_type": "stream",
423
- "text": [
424
- "⠸ Processed 13 chunks, 120 entities(duplicated), 100 relations(duplicated)\r"
425
- ]
426
- },
427
- {
428
- "name": "stderr",
429
- "output_type": "stream",
430
- "text": [
431
- "\n",
432
- "Extracting entities from chunks: 37%|███▋ | 13/35 [00:59<01:18, 3.55s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
433
- ]
434
- },
435
- {
436
- "name": "stdout",
437
- "output_type": "stream",
438
- "text": [
439
- "⠼ Processed 14 chunks, 131 entities(duplicated), 110 relations(duplicated)\r"
440
- ]
441
- },
442
- {
443
- "name": "stderr",
444
- "output_type": "stream",
445
- "text": [
446
- "\n",
447
- "Extracting entities from chunks: 40%|████ | 14/35 [01:00<00:59, 2.82s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
448
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
449
- ]
450
- },
451
- {
452
- "name": "stdout",
453
- "output_type": "stream",
454
- "text": [
455
- "⠴ Processed 15 chunks, 143 entities(duplicated), 110 relations(duplicated)\r"
456
- ]
457
- },
458
- {
459
- "name": "stderr",
460
- "output_type": "stream",
461
- "text": [
462
- "\n",
463
- "Extracting entities from chunks: 43%|████▎ | 15/35 [01:02<00:52, 2.64s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
464
- ]
465
- },
466
- {
467
- "name": "stdout",
468
- "output_type": "stream",
469
- "text": [
470
- "⠦ Processed 16 chunks, 162 entities(duplicated), 124 relations(duplicated)\r"
471
- ]
472
- },
473
- {
474
- "name": "stderr",
475
- "output_type": "stream",
476
- "text": [
477
- "\n",
478
- "Extracting entities from chunks: 46%|████▌ | 16/35 [01:05<00:53, 2.80s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
479
- ]
480
- },
481
- {
482
- "name": "stdout",
483
- "output_type": "stream",
484
- "text": [
485
- "⠧ Processed 17 chunks, 174 entities(duplicated), 132 relations(duplicated)\r"
486
- ]
487
- },
488
- {
489
- "name": "stderr",
490
- "output_type": "stream",
491
- "text": [
492
- "\n",
493
- "Extracting entities from chunks: 49%|████▊ | 17/35 [01:06<00:39, 2.19s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
494
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
495
- ]
496
- },
497
- {
498
- "name": "stdout",
499
- "output_type": "stream",
500
- "text": [
501
- "⠇ Processed 18 chunks, 185 entities(duplicated), 137 relations(duplicated)\r"
502
- ]
503
- },
504
- {
505
- "name": "stderr",
506
- "output_type": "stream",
507
- "text": [
508
- "\n",
509
- "Extracting entities from chunks: 51%|█████▏ | 18/35 [01:12<00:53, 3.15s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
510
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
511
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
512
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
513
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
514
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
515
- ]
516
- },
517
- {
518
- "name": "stdout",
519
- "output_type": "stream",
520
- "text": [
521
- "⠏ Processed 19 chunks, 193 entities(duplicated), 149 relations(duplicated)\r"
522
- ]
523
- },
524
- {
525
- "name": "stderr",
526
- "output_type": "stream",
527
- "text": [
528
- "\n",
529
- "Extracting entities from chunks: 54%|█████▍ | 19/35 [01:18<01:06, 4.14s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
530
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
531
- ]
532
- },
533
- {
534
- "name": "stdout",
535
- "output_type": "stream",
536
- "text": [
537
- "⠋ Processed 20 chunks, 205 entities(duplicated), 158 relations(duplicated)\r"
538
- ]
539
- },
540
- {
541
- "name": "stderr",
542
- "output_type": "stream",
543
- "text": [
544
- "\n",
545
- "Extracting entities from chunks: 57%|█████▋ | 20/35 [01:19<00:50, 3.35s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
546
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
547
- ]
548
- },
549
- {
550
- "name": "stdout",
551
- "output_type": "stream",
552
- "text": [
553
- "⠙ Processed 21 chunks, 220 entities(duplicated), 187 relations(duplicated)\r"
554
- ]
555
- },
556
- {
557
- "name": "stderr",
558
- "output_type": "stream",
559
- "text": [
560
- "\n",
561
- "Extracting entities from chunks: 60%|██████ | 21/35 [01:27<01:02, 4.47s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
562
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
563
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
564
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
565
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
566
- ]
567
- },
568
- {
569
- "name": "stdout",
570
- "output_type": "stream",
571
- "text": [
572
- "⠹ Processed 22 chunks, 247 entities(duplicated), 216 relations(duplicated)\r"
573
- ]
574
- },
575
- {
576
- "name": "stderr",
577
- "output_type": "stream",
578
- "text": [
579
- "\n",
580
- "Extracting entities from chunks: 63%|██████▎ | 22/35 [01:30<00:54, 4.16s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
581
- ]
582
- },
583
- {
584
- "name": "stdout",
585
- "output_type": "stream",
586
- "text": [
587
- "⠸ Processed 23 chunks, 260 entities(duplicated), 230 relations(duplicated)\r"
588
- ]
589
- },
590
- {
591
- "name": "stderr",
592
- "output_type": "stream",
593
- "text": [
594
- "\n",
595
- "Extracting entities from chunks: 66%|██████▌ | 23/35 [01:34<00:48, 4.05s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
596
- ]
597
- },
598
- {
599
- "name": "stdout",
600
- "output_type": "stream",
601
- "text": [
602
- "⠼ Processed 24 chunks, 291 entities(duplicated), 253 relations(duplicated)\r"
603
- ]
604
- },
605
- {
606
- "name": "stderr",
607
- "output_type": "stream",
608
- "text": [
609
- "\n",
610
- "Extracting entities from chunks: 69%|██████▊ | 24/35 [01:38<00:44, 4.03s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
611
- ]
612
- },
613
- {
614
- "name": "stdout",
615
- "output_type": "stream",
616
- "text": [
617
- "⠴ Processed 25 chunks, 304 entities(duplicated), 262 relations(duplicated)\r"
618
- ]
619
- },
620
- {
621
- "name": "stderr",
622
- "output_type": "stream",
623
- "text": [
624
- "\n",
625
- "Extracting entities from chunks: 71%|███████▏ | 25/35 [01:41<00:36, 3.67s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
626
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
627
- ]
628
- },
629
- {
630
- "name": "stdout",
631
- "output_type": "stream",
632
- "text": [
633
- "⠦ Processed 26 chunks, 313 entities(duplicated), 271 relations(duplicated)\r"
634
- ]
635
- },
636
- {
637
- "name": "stderr",
638
- "output_type": "stream",
639
- "text": [
640
- "\n",
641
- "Extracting entities from chunks: 74%|███████▍ | 26/35 [01:41<00:24, 2.76s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
642
- ]
643
- },
644
- {
645
- "name": "stdout",
646
- "output_type": "stream",
647
- "text": [
648
- "⠧ Processed 27 chunks, 321 entities(duplicated), 283 relations(duplicated)\r"
649
- ]
650
- },
651
- {
652
- "name": "stderr",
653
- "output_type": "stream",
654
- "text": [
655
- "\n",
656
- "Extracting entities from chunks: 77%|███████▋ | 27/35 [01:47<00:28, 3.52s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
657
- ]
658
- },
659
- {
660
- "name": "stdout",
661
- "output_type": "stream",
662
- "text": [
663
- "⠇ Processed 28 chunks, 333 entities(duplicated), 290 relations(duplicated)\r"
664
- ]
665
- },
666
- {
667
- "name": "stderr",
668
- "output_type": "stream",
669
- "text": [
670
- "\n",
671
- "Extracting entities from chunks: 80%|████████ | 28/35 [01:52<00:28, 4.08s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
672
- ]
673
- },
674
- {
675
- "name": "stdout",
676
- "output_type": "stream",
677
- "text": [
678
- "⠏ Processed 29 chunks, 348 entities(duplicated), 307 relations(duplicated)\r"
679
- ]
680
- },
681
- {
682
- "name": "stderr",
683
- "output_type": "stream",
684
- "text": [
685
- "\n",
686
- "Extracting entities from chunks: 83%|████████▎ | 29/35 [01:59<00:29, 4.88s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
687
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
688
- ]
689
- },
690
- {
691
- "name": "stdout",
692
- "output_type": "stream",
693
- "text": [
694
- "⠋ Processed 30 chunks, 362 entities(duplicated), 329 relations(duplicated)\r"
695
- ]
696
- },
697
- {
698
- "name": "stderr",
699
- "output_type": "stream",
700
- "text": [
701
- "\n",
702
- "Extracting entities from chunks: 86%|████████▌ | 30/35 [02:02<00:21, 4.29s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
703
- ]
704
- },
705
- {
706
- "name": "stdout",
707
- "output_type": "stream",
708
- "text": [
709
- "⠙ Processed 31 chunks, 373 entities(duplicated), 337 relations(duplicated)\r"
710
- ]
711
- },
712
- {
713
- "name": "stderr",
714
- "output_type": "stream",
715
- "text": [
716
- "\n",
717
- "Extracting entities from chunks: 89%|████████▊ | 31/35 [02:03<00:13, 3.28s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
718
- ]
719
- },
720
- {
721
- "name": "stdout",
722
- "output_type": "stream",
723
- "text": [
724
- "⠹ Processed 32 chunks, 390 entities(duplicated), 369 relations(duplicated)\r"
725
- ]
726
- },
727
- {
728
- "name": "stderr",
729
- "output_type": "stream",
730
- "text": [
731
- "\n",
732
- "Extracting entities from chunks: 91%|█████████▏| 32/35 [02:03<00:07, 2.55s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
733
- ]
734
- },
735
- {
736
- "name": "stdout",
737
- "output_type": "stream",
738
- "text": [
739
- "⠸ Processed 33 chunks, 405 entities(duplicated), 378 relations(duplicated)\r"
740
- ]
741
- },
742
- {
743
- "name": "stderr",
744
- "output_type": "stream",
745
- "text": [
746
- "\n",
747
- "Extracting entities from chunks: 94%|█████████▍| 33/35 [02:07<00:05, 2.84s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
748
- ]
749
- },
750
- {
751
- "name": "stdout",
752
- "output_type": "stream",
753
- "text": [
754
- "⠼ Processed 34 chunks, 435 entities(duplicated), 395 relations(duplicated)\r"
755
- ]
756
- },
757
- {
758
- "name": "stderr",
759
- "output_type": "stream",
760
- "text": [
761
- "\n",
762
- "Extracting entities from chunks: 97%|█████████▋| 34/35 [02:10<00:02, 2.94s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
763
- ]
764
- },
765
- {
766
- "name": "stdout",
767
- "output_type": "stream",
768
- "text": [
769
- "⠴ Processed 35 chunks, 456 entities(duplicated), 440 relations(duplicated)\r"
770
- ]
771
- },
772
- {
773
- "name": "stderr",
774
- "output_type": "stream",
775
- "text": [
776
- "\n",
777
- "Extracting entities from chunks: 100%|██████████| 35/35 [02:23<00:00, 4.10s/chunk]\u001b[A\n",
778
- "INFO:lightrag:Inserting entities into storage...\n",
779
- "\n",
780
- "Inserting entities: 100%|██████████| 324/324 [00:00<00:00, 17456.96entity/s]\n",
781
- "INFO:lightrag:Inserting relationships into storage...\n",
782
- "\n",
783
- "Inserting relationships: 100%|██████████| 427/427 [00:00<00:00, 29956.31relationship/s]\n",
784
- "INFO:lightrag:Inserting 324 vectors to entities\n",
785
- "\n",
786
- "Generating embeddings: 0%| | 0/11 [00:00<?, ?batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
787
- "\n",
788
- "Generating embeddings: 9%|▉ | 1/11 [00:00<00:06, 1.48batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
789
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
790
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
791
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
792
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
793
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
794
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
795
- "\n",
796
- "Generating embeddings: 18%|█▊ | 2/11 [00:02<00:11, 1.25s/batch]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
797
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
798
- "\n",
799
- "Generating embeddings: 27%|██▋ | 3/11 [00:02<00:06, 1.17batch/s]\u001b[A\n",
800
- "Generating embeddings: 36%|███▋ | 4/11 [00:03<00:04, 1.50batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
801
- "\n",
802
- "Generating embeddings: 45%|████▌ | 5/11 [00:03<00:03, 1.78batch/s]\u001b[A\n",
803
- "Generating embeddings: 55%|█████▍ | 6/11 [00:03<00:02, 2.01batch/s]\u001b[A\n",
804
- "Generating embeddings: 64%|██████▎ | 7/11 [00:04<00:01, 2.19batch/s]\u001b[A\n",
805
- "Generating embeddings: 73%|███████▎ | 8/11 [00:04<00:01, 2.31batch/s]\u001b[A\n",
806
- "Generating embeddings: 82%|████████▏ | 9/11 [00:04<00:00, 2.41batch/s]\u001b[A\n",
807
- "Generating embeddings: 91%|█████████ | 10/11 [00:05<00:00, 2.48batch/s]\u001b[A\n",
808
- "Generating embeddings: 100%|██████████| 11/11 [00:05<00:00, 1.91batch/s]\u001b[A\n",
809
- "INFO:lightrag:Inserting 427 vectors to relationships\n",
810
- "\n",
811
- "Generating embeddings: 0%| | 0/14 [00:00<?, ?batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
812
- "\n",
813
- "Generating embeddings: 7%|▋ | 1/14 [00:01<00:14, 1.11s/batch]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
814
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
815
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
816
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
817
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
818
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
819
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
820
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
821
- "\n",
822
- "Generating embeddings: 14%|█▍ | 2/14 [00:02<00:14, 1.18s/batch]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
823
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
824
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
825
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
826
- "\n",
827
- "Generating embeddings: 21%|██▏ | 3/14 [00:02<00:08, 1.23batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
828
- "\n",
829
- "Generating embeddings: 29%|██▊ | 4/14 [00:03<00:06, 1.56batch/s]\u001b[A\n",
830
- "Generating embeddings: 36%|███▌ | 5/14 [00:03<00:04, 1.85batch/s]\u001b[A\n",
831
- "Generating embeddings: 43%|████▎ | 6/14 [00:03<00:03, 2.05batch/s]\u001b[A\n",
832
- "Generating embeddings: 50%|█████ | 7/14 [00:04<00:03, 2.23batch/s]\u001b[A\n",
833
- "Generating embeddings: 57%|█████▋ | 8/14 [00:04<00:02, 2.37batch/s]\u001b[A\n",
834
- "Generating embeddings: 64%|██████▍ | 9/14 [00:04<00:02, 2.46batch/s]\u001b[A\n",
835
- "Generating embeddings: 71%|███████▏ | 10/14 [00:05<00:01, 2.54batch/s]\u001b[A\n",
836
- "Generating embeddings: 79%|███████▊ | 11/14 [00:05<00:01, 2.59batch/s]\u001b[A\n",
837
- "Generating embeddings: 86%|████████▌ | 12/14 [00:06<00:00, 2.64batch/s]\u001b[A\n",
838
- "Generating embeddings: 93%|█████████▎| 13/14 [00:06<00:00, 2.65batch/s]\u001b[A\n",
839
- "Generating embeddings: 100%|██████████| 14/14 [00:06<00:00, 2.05batch/s]\u001b[A\n",
840
- "INFO:lightrag:Writing graph with 333 nodes, 427 edges\n",
841
- "Processing batch 1: 100%|██████████| 1/1 [02:45<00:00, 165.90s/it]\n"
842
- ]
843
- }
844
- ],
845
- "source": [
846
- "# rag.insert(content)\n",
847
- "rag.insert(content, split_by_character=\"\\n#\")"
848
- ]
849
- },
850
- {
851
- "cell_type": "code",
852
- "execution_count": 9,
853
- "id": "c4f9ae517151a01d",
854
- "metadata": {
855
- "ExecuteTime": {
856
- "end_time": "2025-01-09T03:45:11.668987Z",
857
- "start_time": "2025-01-09T03:45:11.664744Z"
858
- }
859
- },
860
- "outputs": [],
861
- "source": [
862
- "prompt1 = \"\"\"你是一名经验丰富的论文分析科学家,你的任务是对一篇英文学术研究论文进行关键信息提取并深入分析。\n",
863
- "请按照以下步骤进行分析:\n",
864
- "1. 该文献主要研究的问题是什么?\n",
865
- "2. 该文献采用什么方法进行分析?\n",
866
- "3. 该文献的主要结论是什么?\n",
867
- "首先在<分析>标签中,针对每个问题详细分析你的思考过程。然后在<回答>标签中给出所有问题的最终答案。\"\"\""
868
- ]
869
- },
870
- {
871
- "cell_type": "code",
872
- "execution_count": 10,
873
- "id": "7a6491385b050095",
874
- "metadata": {
875
- "ExecuteTime": {
876
- "end_time": "2025-01-09T03:45:40.829111Z",
877
- "start_time": "2025-01-09T03:45:13.530298Z"
878
- }
879
- },
880
- "outputs": [
881
- {
882
- "name": "stderr",
883
- "output_type": "stream",
884
- "text": [
885
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
886
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
887
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
888
- "INFO:lightrag:Local query uses 5 entites, 12 relations, 3 text units\n",
889
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
890
- "INFO:lightrag:Global query uses 8 entites, 5 relations, 4 text units\n",
891
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
892
- ]
893
- },
894
- {
895
- "name": "stdout",
896
- "output_type": "stream",
897
- "text": [
898
- "<分析>\n",
899
- "1. **该文献主要研究的问题是什么?**\n",
900
- " - 思考过程:通过浏览论文内容,查找作者明确阐述研究目的的部分。文中多处提及“Our study was performed to explore whether folic acid treatment was associated with cancer outcomes and all-cause mortality after extended follow-up”,表明作者旨在探究叶酸治疗与癌症结局及全因死亡率之间的关系,尤其是在经过长期随访后。\n",
901
- "2. **该文献采用什么方法进行分析?**\n",
902
- " - 思考过程:寻找描述研究方法和数据分析过程的段落。文中提到“Survival curves were constructed using the Kaplan-Meier method and differences in survival between groups were analyzed using the log-rank test. Estimates of hazard ratios (HRs) with 95% CIs were obtained by using Cox proportional hazards regression models stratified by trial”,可以看出作者使用了Kaplan-Meier法构建生存曲线、log-rank检验分析组间生存差异以及Cox比例风险回归模型估计风险比等方法。\n",
903
- "3. **该文献的主要结论是什么?**\n",
904
- " - 思考过程:定位到论文中总结结论的部分,如“Conclusion Treatment with folic acid plus vitamin $\\mathsf{B}_{12}$ was associated with increased cancer outcomes and all-cause mortality in patients with ischemic heart disease in Norway, where there is no folic acid fortification of foods”,可知作者得出叶酸加维生素$\\mathsf{B}_{12}$治疗与癌症结局和全因死亡率增加有关的结论。\n",
905
- "<回答>\n",
906
- "1. 该文献主要研究的问题是:叶酸治疗与癌症结局及全因死亡率之间的关系,尤其是在经过长期随访后,叶酸治疗是否与癌症结局和全因死亡率相关。\n",
907
- "2. 该文献采用的分析方法包括:使用Kaplan-Meier法构建生存曲线、log-rank检验分析组间生存差异、Cox比例风险回归模型估计风险比等。\n",
908
- "3. 该文献的主要结论是:在挪威没有叶酸强化食品的情况下,叶酸加维生素$\\mathsf{B}_{12}$治疗与缺血性心脏病患者的癌症结局和全因死亡率增加有关。\n",
909
- "\n",
910
- "**参考文献**\n",
911
- "- [VD] In2Norwegianhomocysteine-lowering trialsamongpatientswithischemicheart disease, there was a statistically nonsignificantincreaseincancerincidenceinthe groupsassignedtofolicacidtreatment.15,16 Our study was performed to explore whetherfolicacidtreatmentwasassociatedwithcanceroutcomesandall-cause mortality after extended follow-up.\n",
912
- "- [VD] Survivalcurveswereconstructedusing theKaplan-Meiermethodanddifferences insurvivalbetweengroupswereanalyzed usingthelog-ranktest.Estimatesofhazard ratios (HRs) with $95\\%$ CIs were obtainedbyusingCoxproportionalhazards regressionmodelsstratifiedbytrial.\n",
913
- "- [VD] Conclusion Treatment with folic acid plus vitamin $\\mathsf{B}_{12}$ was associated with increased cancer outcomes and all-cause mortality in patients with ischemic heart disease in Norway, where there is no folic acid fortification of foods.\n"
914
- ]
915
- }
916
- ],
917
- "source": [
918
- "resp = rag.query(prompt1, param=QueryParam(mode=\"mix\", top_k=5))\n",
919
- "print(resp)"
920
- ]
921
- },
922
- {
923
- "cell_type": "markdown",
924
- "id": "4e5bfad24cb721a8",
925
- "metadata": {},
926
- "source": [
927
- "#### split by character only"
928
- ]
929
- },
930
- {
931
- "cell_type": "code",
932
- "execution_count": 11,
933
- "id": "44e2992dc95f8ce0",
934
- "metadata": {
935
- "ExecuteTime": {
936
- "end_time": "2025-01-09T03:47:40.988796Z",
937
- "start_time": "2025-01-09T03:47:40.982648Z"
938
- }
939
- },
940
- "outputs": [],
941
- "source": [
942
- "WORKING_DIR = \"../../llm_rag/paper_db/R000088_test2\"\n",
943
- "if not os.path.exists(WORKING_DIR):\n",
944
- " os.mkdir(WORKING_DIR)"
945
- ]
946
- },
947
- {
948
- "cell_type": "code",
949
- "execution_count": 12,
950
- "id": "62c63385d2d973d5",
951
- "metadata": {
952
- "ExecuteTime": {
953
- "end_time": "2025-01-09T03:51:39.951329Z",
954
- "start_time": "2025-01-09T03:49:15.218976Z"
955
- }
956
- },
957
- "outputs": [
958
- {
959
- "name": "stderr",
960
- "output_type": "stream",
961
- "text": [
962
- "INFO:lightrag:Logger initialized for working directory: ../../llm_rag/paper_db/R000088_test2\n",
963
- "INFO:lightrag:Load KV llm_response_cache with 0 data\n",
964
- "INFO:lightrag:Load KV full_docs with 0 data\n",
965
- "INFO:lightrag:Load KV text_chunks with 0 data\n",
966
- "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test2/vdb_entities.json'} 0 data\n",
967
- "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test2/vdb_relationships.json'} 0 data\n",
968
- "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test2/vdb_chunks.json'} 0 data\n",
969
- "INFO:lightrag:Loaded document status storage with 0 records\n",
970
- "INFO:lightrag:Processing 1 new unique documents\n",
971
- "Processing batch 1: 0%| | 0/1 [00:00<?, ?it/s]INFO:lightrag:Inserting 12 vectors to chunks\n",
972
- "\n",
973
- "Generating embeddings: 0%| | 0/1 [00:00<?, ?batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
974
- "\n",
975
- "Generating embeddings: 100%|██████████| 1/1 [00:02<00:00, 2.95s/batch]\u001b[A\n",
976
- "\n",
977
- "Extracting entities from chunks: 0%| | 0/12 [00:00<?, ?chunk/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
978
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
979
- ]
980
- },
981
- {
982
- "name": "stdout",
983
- "output_type": "stream",
984
- "text": [
985
- "⠙ Processed 1 chunks, 0 entities(duplicated), 0 relations(duplicated)\r"
986
- ]
987
- },
988
- {
989
- "name": "stderr",
990
- "output_type": "stream",
991
- "text": [
992
- "\n",
993
- "Extracting entities from chunks: 8%|▊ | 1/12 [00:03<00:43, 3.93s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
994
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
995
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
996
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
997
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
998
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
999
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1000
- ]
1001
- },
1002
- {
1003
- "name": "stdout",
1004
- "output_type": "stream",
1005
- "text": [
1006
- "⠹ Processed 2 chunks, 8 entities(duplicated), 8 relations(duplicated)\r"
1007
- ]
1008
- },
1009
- {
1010
- "name": "stderr",
1011
- "output_type": "stream",
1012
- "text": [
1013
- "\n",
1014
- "Extracting entities from chunks: 17%|█▋ | 2/12 [00:29<02:44, 16.46s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1015
- ]
1016
- },
1017
- {
1018
- "name": "stdout",
1019
- "output_type": "stream",
1020
- "text": [
1021
- "⠸ Processed 3 chunks, 17 entities(duplicated), 15 relations(duplicated)\r"
1022
- ]
1023
- },
1024
- {
1025
- "name": "stderr",
1026
- "output_type": "stream",
1027
- "text": [
1028
- "\n",
1029
- "Extracting entities from chunks: 25%|██▌ | 3/12 [00:30<01:25, 9.45s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
1030
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1031
- ]
1032
- },
1033
- {
1034
- "name": "stdout",
1035
- "output_type": "stream",
1036
- "text": [
1037
- "⠼ Processed 4 chunks, 27 entities(duplicated), 22 relations(duplicated)\r"
1038
- ]
1039
- },
1040
- {
1041
- "name": "stderr",
1042
- "output_type": "stream",
1043
- "text": [
1044
- "\n",
1045
- "Extracting entities from chunks: 33%|███▎ | 4/12 [00:39<01:16, 9.52s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1046
- ]
1047
- },
1048
- {
1049
- "name": "stdout",
1050
- "output_type": "stream",
1051
- "text": [
1052
- "⠴ Processed 5 chunks, 36 entities(duplicated), 33 relations(duplicated)\r"
1053
- ]
1054
- },
1055
- {
1056
- "name": "stderr",
1057
- "output_type": "stream",
1058
- "text": [
1059
- "\n",
1060
- "Extracting entities from chunks: 42%|████▏ | 5/12 [00:40<00:43, 6.24s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
1061
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
1062
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1063
- ]
1064
- },
1065
- {
1066
- "name": "stdout",
1067
- "output_type": "stream",
1068
- "text": [
1069
- "⠦ Processed 6 chunks, 49 entities(duplicated), 42 relations(duplicated)\r"
1070
- ]
1071
- },
1072
- {
1073
- "name": "stderr",
1074
- "output_type": "stream",
1075
- "text": [
1076
- "\n",
1077
- "Extracting entities from chunks: 50%|█████ | 6/12 [00:49<00:43, 7.33s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1078
- ]
1079
- },
1080
- {
1081
- "name": "stdout",
1082
- "output_type": "stream",
1083
- "text": [
1084
- "⠧ Processed 7 chunks, 62 entities(duplicated), 65 relations(duplicated)\r"
1085
- ]
1086
- },
1087
- {
1088
- "name": "stderr",
1089
- "output_type": "stream",
1090
- "text": [
1091
- "\n",
1092
- "Extracting entities from chunks: 58%|█████▊ | 7/12 [01:05<00:50, 10.05s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
1093
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1094
- ]
1095
- },
1096
- {
1097
- "name": "stdout",
1098
- "output_type": "stream",
1099
- "text": [
1100
- "⠇ Processed 8 chunks, 81 entities(duplicated), 90 relations(duplicated)\r"
1101
- ]
1102
- },
1103
- {
1104
- "name": "stderr",
1105
- "output_type": "stream",
1106
- "text": [
1107
- "\n",
1108
- "Extracting entities from chunks: 67%|██████▋ | 8/12 [01:23<00:50, 12.69s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1109
- ]
1110
- },
1111
- {
1112
- "name": "stdout",
1113
- "output_type": "stream",
1114
- "text": [
1115
- "⠏ Processed 9 chunks, 99 entities(duplicated), 117 relations(duplicated)\r"
1116
- ]
1117
- },
1118
- {
1119
- "name": "stderr",
1120
- "output_type": "stream",
1121
- "text": [
1122
- "\n",
1123
- "Extracting entities from chunks: 75%|███████▌ | 9/12 [01:32<00:34, 11.54s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
1124
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1125
- ]
1126
- },
1127
- {
1128
- "name": "stdout",
1129
- "output_type": "stream",
1130
- "text": [
1131
- "⠋ Processed 10 chunks, 123 entities(duplicated), 140 relations(duplicated)\r"
1132
- ]
1133
- },
1134
- {
1135
- "name": "stderr",
1136
- "output_type": "stream",
1137
- "text": [
1138
- "\n",
1139
- "Extracting entities from chunks: 83%|████████▎ | 10/12 [01:48<00:25, 12.79s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1140
- ]
1141
- },
1142
- {
1143
- "name": "stdout",
1144
- "output_type": "stream",
1145
- "text": [
1146
- "⠙ Processed 11 chunks, 158 entities(duplicated), 174 relations(duplicated)\r"
1147
- ]
1148
- },
1149
- {
1150
- "name": "stderr",
1151
- "output_type": "stream",
1152
- "text": [
1153
- "\n",
1154
- "Extracting entities from chunks: 92%|█████████▏| 11/12 [02:03<00:13, 13.50s/chunk]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1155
- ]
1156
- },
1157
- {
1158
- "name": "stdout",
1159
- "output_type": "stream",
1160
- "text": [
1161
- "⠹ Processed 12 chunks, 194 entities(duplicated), 221 relations(duplicated)\r"
1162
- ]
1163
- },
1164
- {
1165
- "name": "stderr",
1166
- "output_type": "stream",
1167
- "text": [
1168
- "\n",
1169
- "Extracting entities from chunks: 100%|██████████| 12/12 [02:13<00:00, 11.15s/chunk]\u001b[A\n",
1170
- "INFO:lightrag:Inserting entities into storage...\n",
1171
- "\n",
1172
- "Inserting entities: 100%|██████████| 170/170 [00:00<00:00, 11610.25entity/s]\n",
1173
- "INFO:lightrag:Inserting relationships into storage...\n",
1174
- "\n",
1175
- "Inserting relationships: 100%|██████████| 218/218 [00:00<00:00, 15913.51relationship/s]\n",
1176
- "INFO:lightrag:Inserting 170 vectors to entities\n",
1177
- "\n",
1178
- "Generating embeddings: 0%| | 0/6 [00:00<?, ?batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1179
- "\n",
1180
- "Generating embeddings: 17%|█▋ | 1/6 [00:01<00:05, 1.10s/batch]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1181
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1182
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1183
- "\n",
1184
- "Generating embeddings: 33%|███▎ | 2/6 [00:02<00:04, 1.07s/batch]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1185
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1186
- "\n",
1187
- "Generating embeddings: 50%|█████ | 3/6 [00:02<00:02, 1.33batch/s]\u001b[A\n",
1188
- "Generating embeddings: 67%|██████▋ | 4/6 [00:02<00:01, 1.67batch/s]\u001b[A\n",
1189
- "Generating embeddings: 83%|████████▎ | 5/6 [00:03<00:00, 1.95batch/s]\u001b[A\n",
1190
- "Generating embeddings: 100%|██████████| 6/6 [00:03<00:00, 1.66batch/s]\u001b[A\n",
1191
- "INFO:lightrag:Inserting 218 vectors to relationships\n",
1192
- "\n",
1193
- "Generating embeddings: 0%| | 0/7 [00:00<?, ?batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1194
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1195
- "\n",
1196
- "Generating embeddings: 14%|█▍ | 1/7 [00:01<00:10, 1.74s/batch]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1197
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1198
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1199
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1200
- "\n",
1201
- "Generating embeddings: 29%|██▊ | 2/7 [00:02<00:05, 1.04s/batch]\u001b[A\n",
1202
- "Generating embeddings: 43%|████▎ | 3/7 [00:02<00:02, 1.35batch/s]\u001b[A\n",
1203
- "Generating embeddings: 57%|█████▋ | 4/7 [00:03<00:01, 1.69batch/s]\u001b[AINFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1204
- "\n",
1205
- "Generating embeddings: 71%|███████▏ | 5/7 [00:03<00:01, 1.96batch/s]\u001b[A\n",
1206
- "Generating embeddings: 86%|████████▌ | 6/7 [00:03<00:00, 2.17batch/s]\u001b[A\n",
1207
- "Generating embeddings: 100%|██████████| 7/7 [00:04<00:00, 1.68batch/s]\u001b[A\n",
1208
- "INFO:lightrag:Writing graph with 174 nodes, 218 edges\n",
1209
- "Processing batch 1: 100%|██████████| 1/1 [02:24<00:00, 144.69s/it]\n"
1210
- ]
1211
- }
1212
- ],
1213
- "source": [
1214
- "rag = LightRAG(\n",
1215
- " working_dir=WORKING_DIR,\n",
1216
- " llm_model_func=llm_model_func,\n",
1217
- " embedding_func=EmbeddingFunc(\n",
1218
- " embedding_dim=4096, max_token_size=8192, func=embedding_func\n",
1219
- " ),\n",
1220
- " chunk_token_size=512,\n",
1221
- ")\n",
1222
- "\n",
1223
- "# rag.insert(content)\n",
1224
- "rag.insert(content, split_by_character=\"\\n#\", split_by_character_only=True)"
1225
- ]
1226
- },
1227
- {
1228
- "cell_type": "code",
1229
- "execution_count": 13,
1230
- "id": "3c7aa9836d8d43c7",
1231
- "metadata": {
1232
- "ExecuteTime": {
1233
- "end_time": "2025-01-09T03:52:37.000418Z",
1234
- "start_time": "2025-01-09T03:52:09.933584Z"
1235
- }
1236
- },
1237
- "outputs": [
1238
- {
1239
- "name": "stderr",
1240
- "output_type": "stream",
1241
- "text": [
1242
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1243
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n",
1244
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1245
- "INFO:lightrag:Local query uses 5 entites, 3 relations, 2 text units\n",
1246
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n",
1247
- "INFO:lightrag:Global query uses 9 entites, 5 relations, 4 text units\n",
1248
- "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n"
1249
- ]
1250
- },
1251
- {
1252
- "name": "stdout",
1253
- "output_type": "stream",
1254
- "text": [
1255
- "<分析>\n",
1256
- "- **该文献主要研究的问题是什么?**\n",
1257
- " - **思考过程**:通过浏览论文的标题、摘要、引言等部分,寻找关于研究目的和问题的描述。论文标题为“Cancer Incidence and Mortality After Treatment With Folic Acid and Vitamin B12”,摘要中的“Objective”部分明确指出研究目的是“To evaluate effects of treatment with B vitamins on cancer outcomes and all-cause mortality in 2 randomized controlled trials”。因此,可以确定该文献主要研究的问题是评估B族维生素治疗对两项随机对照试验中癌症结局和全因死亡率的影响。\n",
1258
- "- **该文献采用什么方法进行分析?**\n",
1259
- " - **思考过程**:在论文的“METHODS”部分详细描述了研究方法。文中提到这是一个对两项随机、双盲、安慰剂对照临床试验(Norwegian Vitamin [NORVIT] trial和Western Norway B Vitamin Intervention Trial [WENBIT])数据的联合分析,并进行了观察性的试验后随访。具体包括对参与者进行分组干预(不同剂量的叶酸、维生素B12、维生素B6或安慰剂),收集临床信息和血样,分析循环B族维生素、同型半胱氨酸和可替宁等指标,并进行基因分型等,还涉及到多种统计分析方法,如计算预期癌症发生率、构建生存曲线、进行Cox比例风险回归模型分析等。\n",
1260
- "- **该文献的主要结论是什么?**\n",
1261
- " - **思考过程**:在论文的“Results”和“Conclusion”部分寻找主要结论。研究结果表明,在治疗期间,接受叶酸加维生素B12治疗的参与者血清叶酸浓度显著增加,且在后续随访中,该组癌症发病率、癌症死亡率和全因死亡率均有所上升,主要是肺癌发病率增加,而维生素B6治疗未显示出显著影响。结论部分明确指出“Treatment with folic acid plus vitamin $\\mathsf{B}_{12}$ was associated with increased cancer outcomes and all-cause mortality in patients with ischemic heart disease in Norway, where there is no folic acid fortification of foods”。\n",
1262
- "</分析>\n",
1263
- "\n",
1264
- "<回答>\n",
1265
- "- **主要研究问题**:评估B族维生素治疗对两项随机对照试验中癌症结局和全因死亡率的影响。\n",
1266
- "- **研究方法**:采用对两项随机、双盲、安慰剂对照临床试验(Norwegian Vitamin [NORVIT] trial和Western Norway B Vitamin Intervention Trial [WENBIT])数据的联合分析,并进行观察性的试验后随访,涉及分组干预、多种指标检测以及多种统计分析方法。\n",
1267
- "- **主要结论**:在挪威(食品中未添加叶酸),对于缺血性心脏病患者,叶酸加维生素B12治疗与癌症结局和全因死亡率的增加有关,而维生素B6治疗未显示出显著影响。\n",
1268
- "\n",
1269
- "**参考文献**\n",
1270
- "- [VD] Cancer Incidence and Mortality After Treatment With Folic Acid and Vitamin B12\n",
1271
- "- [VD] METHODS Study Design, Participants, and Study Intervention\n",
1272
- "- [VD] RESULTS\n",
1273
- "- [VD] Conclusion\n",
1274
- "- [VD] Objective To evaluate effects of treatment with B vitamins on cancer outcomes and all-cause mortality in 2 randomized controlled trials.\n"
1275
- ]
1276
- }
1277
- ],
1278
- "source": [
1279
- "resp = rag.query(prompt1, param=QueryParam(mode=\"mix\", top_k=5))\n",
1280
- "print(resp)"
1281
- ]
1282
- },
1283
- {
1284
- "cell_type": "code",
1285
- "execution_count": null,
1286
- "id": "7ba6fa79a2550d10",
1287
- "metadata": {},
1288
- "outputs": [],
1289
- "source": []
1290
- }
1291
- ],
1292
- "metadata": {
1293
- "kernelspec": {
1294
- "display_name": "Python 3",
1295
- "language": "python",
1296
- "name": "python3"
1297
- },
1298
- "language_info": {
1299
- "codemirror_mode": {
1300
- "name": "ipython",
1301
- "version": 2
1302
- },
1303
- "file_extension": ".py",
1304
- "mimetype": "text/x-python",
1305
- "name": "python",
1306
- "nbconvert_exporter": "python",
1307
- "pygments_lexer": "ipython2",
1308
- "version": "2.7.6"
1309
- }
1310
- },
1311
- "nbformat": 4,
1312
- "nbformat_minor": 5
1313
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/vram_management_demo.py CHANGED
@@ -14,6 +14,7 @@ TEXT_FILES_DIR = "/llm/mt"
14
  if not os.path.exists(WORKING_DIR):
15
  os.mkdir(WORKING_DIR)
16
 
 
17
  async def initialize_rag():
18
  # Initialize LightRAG
19
  rag = LightRAG(
@@ -31,6 +32,7 @@ async def initialize_rag():
31
 
32
  return rag
33
 
 
34
  # Read all .txt files from the TEXT_FILES_DIR directory
35
  texts = []
36
  for filename in os.listdir(TEXT_FILES_DIR):
@@ -82,7 +84,8 @@ def main():
82
  try:
83
  print(
84
  rag.query(
85
- "What are the top themes in this story?", param=QueryParam(mode="global")
 
86
  )
87
  )
88
  except Exception as e:
@@ -91,18 +94,17 @@ def main():
91
  try:
92
  print(
93
  rag.query(
94
- "What are the top themes in this story?", param=QueryParam(mode="hybrid")
 
95
  )
96
  )
97
  except Exception as e:
98
  print(f"Error performing hybrid search: {e}")
99
 
100
-
101
  # Function to clear VRAM resources
102
  def clear_vram():
103
  os.system("sudo nvidia-smi --gpu-reset")
104
 
105
-
106
  # Regularly clear VRAM to prevent overflow
107
  clear_vram_interval = 3600 # Clear once every hour
108
  start_time = time.time()
@@ -114,5 +116,6 @@ def main():
114
  start_time = current_time
115
  time.sleep(60) # Check the time every minute
116
 
 
117
  if __name__ == "__main__":
118
  main()
 
14
  if not os.path.exists(WORKING_DIR):
15
  os.mkdir(WORKING_DIR)
16
 
17
+
18
  async def initialize_rag():
19
  # Initialize LightRAG
20
  rag = LightRAG(
 
32
 
33
  return rag
34
 
35
+
36
  # Read all .txt files from the TEXT_FILES_DIR directory
37
  texts = []
38
  for filename in os.listdir(TEXT_FILES_DIR):
 
84
  try:
85
  print(
86
  rag.query(
87
+ "What are the top themes in this story?",
88
+ param=QueryParam(mode="global"),
89
  )
90
  )
91
  except Exception as e:
 
94
  try:
95
  print(
96
  rag.query(
97
+ "What are the top themes in this story?",
98
+ param=QueryParam(mode="hybrid"),
99
  )
100
  )
101
  except Exception as e:
102
  print(f"Error performing hybrid search: {e}")
103
 
 
104
  # Function to clear VRAM resources
105
  def clear_vram():
106
  os.system("sudo nvidia-smi --gpu-reset")
107
 
 
108
  # Regularly clear VRAM to prevent overflow
109
  clear_vram_interval = 3600 # Clear once every hour
110
  start_time = time.time()
 
116
  start_time = current_time
117
  time.sleep(60) # Check the time every minute
118
 
119
+
120
  if __name__ == "__main__":
121
  main()
reproduce/Step_1.py CHANGED
@@ -31,6 +31,7 @@ WORKING_DIR = f"../{cls}"
31
  if not os.path.exists(WORKING_DIR):
32
  os.mkdir(WORKING_DIR)
33
 
 
34
  async def initialize_rag():
35
  rag = LightRAG(working_dir=WORKING_DIR)
36
 
@@ -39,6 +40,7 @@ async def initialize_rag():
39
 
40
  return rag
41
 
 
42
  def main():
43
  # Initialize RAG instance
44
  rag = asyncio.run(initialize_rag())
 
31
  if not os.path.exists(WORKING_DIR):
32
  os.mkdir(WORKING_DIR)
33
 
34
+
35
  async def initialize_rag():
36
  rag = LightRAG(working_dir=WORKING_DIR)
37
 
 
40
 
41
  return rag
42
 
43
+
44
  def main():
45
  # Initialize RAG instance
46
  rag = asyncio.run(initialize_rag())
reproduce/Step_1_openai_compatible.py CHANGED
@@ -62,6 +62,7 @@ WORKING_DIR = f"../{cls}"
62
  if not os.path.exists(WORKING_DIR):
63
  os.mkdir(WORKING_DIR)
64
 
 
65
  async def initialize_rag():
66
  rag = LightRAG(
67
  working_dir=WORKING_DIR,
@@ -76,6 +77,7 @@ async def initialize_rag():
76
 
77
  return rag
78
 
 
79
  def main():
80
  # Initialize RAG instance
81
  rag = asyncio.run(initialize_rag())
 
62
  if not os.path.exists(WORKING_DIR):
63
  os.mkdir(WORKING_DIR)
64
 
65
+
66
  async def initialize_rag():
67
  rag = LightRAG(
68
  working_dir=WORKING_DIR,
 
77
 
78
  return rag
79
 
80
+
81
  def main():
82
  # Initialize RAG instance
83
  rag = asyncio.run(initialize_rag())