zrguo commited on
Commit
96fdb2f
·
1 Parent(s): e6b8d67

Update RAGAnything related

Browse files
README.md CHANGED
@@ -1159,40 +1159,95 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
1159
  pip install raganything
1160
  ```
1161
  2. Process multimodal documents:
1162
- ```python
1163
- import asyncio
1164
- from raganything import RAGAnything
1165
- from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1166
-
1167
- async def main():
1168
- # Initialize RAGAnything with LightRAG integration
1169
- rag = RAGAnything(
1170
- working_dir="./rag_storage",
1171
- llm_model_func=lambda prompt, **kwargs: openai_complete_if_cache(
1172
- "gpt-4o-mini", prompt, api_key="your-api-key", **kwargs
1173
- ),
1174
- embedding_func=lambda texts: openai_embed(
1175
- texts, model="text-embedding-3-large", api_key="your-api-key"
1176
- ),
1177
- embedding_dim=3072,
1178
- )
1179
-
1180
- # Process multimodal documents
1181
- await rag.process_document_complete(
1182
- file_path="path/to/your/document.pdf",
1183
- output_dir="./output"
1184
- )
1185
-
1186
- # Query multimodal content
1187
- result = await rag.query_with_multimodal(
1188
- "What are the main findings shown in the figures and tables?",
1189
- mode="hybrid"
1190
- )
1191
- print(result)
1192
-
1193
- if __name__ == "__main__":
1194
- asyncio.run(main())
1195
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1196
 
1197
  For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
1198
 
 
1159
  pip install raganything
1160
  ```
1161
  2. Process multimodal documents:
1162
+ ```python
1163
+ import asyncio
1164
+ from raganything import RAGAnything
1165
+ from lightrag import LightRAG
1166
+ from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1167
+ from lightrag.utils import EmbeddingFunc
1168
+ import os
1169
+
1170
+ async def load_existing_lightrag():
1171
+ # First, create or load an existing LightRAG instance
1172
+ lightrag_working_dir = "./existing_lightrag_storage"
1173
+
1174
+ # Check if previous LightRAG instance exists
1175
+ if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1176
+ print("✅ Found existing LightRAG instance, loading...")
1177
+ else:
1178
+ print("❌ No existing LightRAG instance found, will create new one")
1179
+
1180
+ # Create/Load LightRAG instance with your configurations
1181
+ lightrag_instance = LightRAG(
1182
+ working_dir=lightrag_working_dir,
1183
+ llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
1184
+ "gpt-4o-mini",
1185
+ prompt,
1186
+ system_prompt=system_prompt,
1187
+ history_messages=history_messages,
1188
+ api_key="your-api-key",
1189
+ **kwargs,
1190
+ ),
1191
+ embedding_func=EmbeddingFunc(
1192
+ embedding_dim=3072,
1193
+ max_token_size=8192,
1194
+ func=lambda texts: openai_embed(
1195
+ texts,
1196
+ model="text-embedding-3-large",
1197
+ api_key=api_key,
1198
+ base_url=base_url,
1199
+ ),
1200
+ )
1201
+ )
1202
+
1203
+ # Initialize storage (this will load existing data if available)
1204
+ await lightrag_instance.initialize_storages()
1205
+
1206
+ # Now initialize RAGAnything with the existing LightRAG instance
1207
+ rag = RAGAnything(
1208
+ lightrag=lightrag_instance, # Pass the existing LightRAG instance
1209
+ # Only need vision model for multimodal processing
1210
+ vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
1211
+ "gpt-4o",
1212
+ "",
1213
+ system_prompt=None,
1214
+ history_messages=[],
1215
+ messages=[
1216
+ {"role": "system", "content": system_prompt} if system_prompt else None,
1217
+ {"role": "user", "content": [
1218
+ {"type": "text", "text": prompt},
1219
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
1220
+ ]} if image_data else {"role": "user", "content": prompt}
1221
+ ],
1222
+ api_key="your-api-key",
1223
+ **kwargs,
1224
+ ) if image_data else openai_complete_if_cache(
1225
+ "gpt-4o-mini",
1226
+ prompt,
1227
+ system_prompt=system_prompt,
1228
+ history_messages=history_messages,
1229
+ api_key="your-api-key",
1230
+ **kwargs,
1231
+ )
1232
+ # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
1233
+ )
1234
+
1235
+ # Query the existing knowledge base
1236
+ result = await rag.query_with_multimodal(
1237
+ "What data has been processed in this LightRAG instance?",
1238
+ mode="hybrid"
1239
+ )
1240
+ print("Query result:", result)
1241
+
1242
+ # Add new multimodal documents to the existing LightRAG instance
1243
+ await rag.process_document_complete(
1244
+ file_path="path/to/new/multimodal_document.pdf",
1245
+ output_dir="./output"
1246
+ )
1247
+
1248
+ if __name__ == "__main__":
1249
+ asyncio.run(load_existing_lightrag())
1250
+ ```
1251
 
1252
  For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
1253
 
examples/modalprocessors_example.py CHANGED
@@ -9,6 +9,7 @@ import argparse
9
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
10
  from lightrag.kg.shared_storage import initialize_pipeline_status
11
  from lightrag import LightRAG
 
12
  from raganything.modalprocessors import (
13
  ImageModalProcessor,
14
  TableModalProcessor,
@@ -165,11 +166,15 @@ async def process_equation_example(lightrag: LightRAG, llm_model_func):
165
  async def initialize_rag(api_key: str, base_url: str = None):
166
  rag = LightRAG(
167
  working_dir=WORKING_DIR,
168
- embedding_func=lambda texts: openai_embed(
169
- texts,
170
- model="text-embedding-3-large",
171
- api_key=api_key,
172
- base_url=base_url,
 
 
 
 
173
  ),
174
  llm_model_func=lambda prompt,
175
  system_prompt=None,
 
9
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
10
  from lightrag.kg.shared_storage import initialize_pipeline_status
11
  from lightrag import LightRAG
12
+ from lightrag.utils import EmbeddingFunc
13
  from raganything.modalprocessors import (
14
  ImageModalProcessor,
15
  TableModalProcessor,
 
166
  async def initialize_rag(api_key: str, base_url: str = None):
167
  rag = LightRAG(
168
  working_dir=WORKING_DIR,
169
+ embedding_func=EmbeddingFunc(
170
+ embedding_dim=3072,
171
+ max_token_size=8192,
172
+ func=lambda texts: openai_embed(
173
+ texts,
174
+ model="text-embedding-3-large",
175
+ api_key=api_key,
176
+ base_url=base_url,
177
+ ),
178
  ),
179
  llm_model_func=lambda prompt,
180
  system_prompt=None,
examples/raganything_example.py CHANGED
@@ -12,6 +12,7 @@ import os
12
  import argparse
13
  import asyncio
14
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 
15
  from raganything.raganything import RAGAnything
16
 
17
 
@@ -89,14 +90,16 @@ async def process_with_rag(
89
  base_url=base_url,
90
  **kwargs,
91
  ),
92
- embedding_func=lambda texts: openai_embed(
93
- texts,
94
- model="text-embedding-3-large",
95
- api_key=api_key,
96
- base_url=base_url,
 
 
 
 
97
  ),
98
- embedding_dim=3072,
99
- max_token_size=8192,
100
  )
101
 
102
  # Process document
 
12
  import argparse
13
  import asyncio
14
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
15
+ from lightrag.utils import EmbeddingFunc
16
  from raganything.raganything import RAGAnything
17
 
18
 
 
90
  base_url=base_url,
91
  **kwargs,
92
  ),
93
+ embedding_func=EmbeddingFunc(
94
+ embedding_dim=3072,
95
+ max_token_size=8192,
96
+ func=lambda texts: openai_embed(
97
+ texts,
98
+ model="text-embedding-3-large",
99
+ api_key=api_key,
100
+ base_url=base_url,
101
+ ),
102
  ),
 
 
103
  )
104
 
105
  # Process document