zrguo
commited on
Commit
·
96fdb2f
1
Parent(s):
e6b8d67
Update RAGAnything related
Browse files- README.md +89 -34
- examples/modalprocessors_example.py +10 -5
- examples/raganything_example.py +10 -7
README.md
CHANGED
@@ -1159,40 +1159,95 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
|
1159 |
pip install raganything
|
1160 |
```
|
1161 |
2. Process multimodal documents:
|
1162 |
-
|
1163 |
-
|
1164 |
-
|
1165 |
-
|
1166 |
-
|
1167 |
-
|
1168 |
-
|
1169 |
-
|
1170 |
-
|
1171 |
-
|
1172 |
-
|
1173 |
-
|
1174 |
-
|
1175 |
-
|
1176 |
-
|
1177 |
-
|
1178 |
-
|
1179 |
-
|
1180 |
-
|
1181 |
-
|
1182 |
-
|
1183 |
-
|
1184 |
-
|
1185 |
-
|
1186 |
-
|
1187 |
-
|
1188 |
-
|
1189 |
-
|
1190 |
-
|
1191 |
-
|
1192 |
-
|
1193 |
-
|
1194 |
-
|
1195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1196 |
|
1197 |
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
|
1198 |
|
|
|
1159 |
pip install raganything
|
1160 |
```
|
1161 |
2. Process multimodal documents:
|
1162 |
+
```python
|
1163 |
+
import asyncio
|
1164 |
+
from raganything import RAGAnything
|
1165 |
+
from lightrag import LightRAG
|
1166 |
+
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
1167 |
+
from lightrag.utils import EmbeddingFunc
|
1168 |
+
import os
|
1169 |
+
|
1170 |
+
async def load_existing_lightrag():
|
1171 |
+
# First, create or load an existing LightRAG instance
|
1172 |
+
lightrag_working_dir = "./existing_lightrag_storage"
|
1173 |
+
|
1174 |
+
# Check if previous LightRAG instance exists
|
1175 |
+
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
1176 |
+
print("✅ Found existing LightRAG instance, loading...")
|
1177 |
+
else:
|
1178 |
+
print("❌ No existing LightRAG instance found, will create new one")
|
1179 |
+
|
1180 |
+
# Create/Load LightRAG instance with your configurations
|
1181 |
+
lightrag_instance = LightRAG(
|
1182 |
+
working_dir=lightrag_working_dir,
|
1183 |
+
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
|
1184 |
+
"gpt-4o-mini",
|
1185 |
+
prompt,
|
1186 |
+
system_prompt=system_prompt,
|
1187 |
+
history_messages=history_messages,
|
1188 |
+
api_key="your-api-key",
|
1189 |
+
**kwargs,
|
1190 |
+
),
|
1191 |
+
embedding_func=EmbeddingFunc(
|
1192 |
+
embedding_dim=3072,
|
1193 |
+
max_token_size=8192,
|
1194 |
+
func=lambda texts: openai_embed(
|
1195 |
+
texts,
|
1196 |
+
model="text-embedding-3-large",
|
1197 |
+
api_key=api_key,
|
1198 |
+
base_url=base_url,
|
1199 |
+
),
|
1200 |
+
)
|
1201 |
+
)
|
1202 |
+
|
1203 |
+
# Initialize storage (this will load existing data if available)
|
1204 |
+
await lightrag_instance.initialize_storages()
|
1205 |
+
|
1206 |
+
# Now initialize RAGAnything with the existing LightRAG instance
|
1207 |
+
rag = RAGAnything(
|
1208 |
+
lightrag=lightrag_instance, # Pass the existing LightRAG instance
|
1209 |
+
# Only need vision model for multimodal processing
|
1210 |
+
vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
|
1211 |
+
"gpt-4o",
|
1212 |
+
"",
|
1213 |
+
system_prompt=None,
|
1214 |
+
history_messages=[],
|
1215 |
+
messages=[
|
1216 |
+
{"role": "system", "content": system_prompt} if system_prompt else None,
|
1217 |
+
{"role": "user", "content": [
|
1218 |
+
{"type": "text", "text": prompt},
|
1219 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
|
1220 |
+
]} if image_data else {"role": "user", "content": prompt}
|
1221 |
+
],
|
1222 |
+
api_key="your-api-key",
|
1223 |
+
**kwargs,
|
1224 |
+
) if image_data else openai_complete_if_cache(
|
1225 |
+
"gpt-4o-mini",
|
1226 |
+
prompt,
|
1227 |
+
system_prompt=system_prompt,
|
1228 |
+
history_messages=history_messages,
|
1229 |
+
api_key="your-api-key",
|
1230 |
+
**kwargs,
|
1231 |
+
)
|
1232 |
+
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
|
1233 |
+
)
|
1234 |
+
|
1235 |
+
# Query the existing knowledge base
|
1236 |
+
result = await rag.query_with_multimodal(
|
1237 |
+
"What data has been processed in this LightRAG instance?",
|
1238 |
+
mode="hybrid"
|
1239 |
+
)
|
1240 |
+
print("Query result:", result)
|
1241 |
+
|
1242 |
+
# Add new multimodal documents to the existing LightRAG instance
|
1243 |
+
await rag.process_document_complete(
|
1244 |
+
file_path="path/to/new/multimodal_document.pdf",
|
1245 |
+
output_dir="./output"
|
1246 |
+
)
|
1247 |
+
|
1248 |
+
if __name__ == "__main__":
|
1249 |
+
asyncio.run(load_existing_lightrag())
|
1250 |
+
```
|
1251 |
|
1252 |
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
|
1253 |
|
examples/modalprocessors_example.py
CHANGED
@@ -9,6 +9,7 @@ import argparse
|
|
9 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
10 |
from lightrag.kg.shared_storage import initialize_pipeline_status
|
11 |
from lightrag import LightRAG
|
|
|
12 |
from raganything.modalprocessors import (
|
13 |
ImageModalProcessor,
|
14 |
TableModalProcessor,
|
@@ -165,11 +166,15 @@ async def process_equation_example(lightrag: LightRAG, llm_model_func):
|
|
165 |
async def initialize_rag(api_key: str, base_url: str = None):
|
166 |
rag = LightRAG(
|
167 |
working_dir=WORKING_DIR,
|
168 |
-
embedding_func=
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
173 |
),
|
174 |
llm_model_func=lambda prompt,
|
175 |
system_prompt=None,
|
|
|
9 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
10 |
from lightrag.kg.shared_storage import initialize_pipeline_status
|
11 |
from lightrag import LightRAG
|
12 |
+
from lightrag.utils import EmbeddingFunc
|
13 |
from raganything.modalprocessors import (
|
14 |
ImageModalProcessor,
|
15 |
TableModalProcessor,
|
|
|
166 |
async def initialize_rag(api_key: str, base_url: str = None):
|
167 |
rag = LightRAG(
|
168 |
working_dir=WORKING_DIR,
|
169 |
+
embedding_func=EmbeddingFunc(
|
170 |
+
embedding_dim=3072,
|
171 |
+
max_token_size=8192,
|
172 |
+
func=lambda texts: openai_embed(
|
173 |
+
texts,
|
174 |
+
model="text-embedding-3-large",
|
175 |
+
api_key=api_key,
|
176 |
+
base_url=base_url,
|
177 |
+
),
|
178 |
),
|
179 |
llm_model_func=lambda prompt,
|
180 |
system_prompt=None,
|
examples/raganything_example.py
CHANGED
@@ -12,6 +12,7 @@ import os
|
|
12 |
import argparse
|
13 |
import asyncio
|
14 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
|
|
15 |
from raganything.raganything import RAGAnything
|
16 |
|
17 |
|
@@ -89,14 +90,16 @@ async def process_with_rag(
|
|
89 |
base_url=base_url,
|
90 |
**kwargs,
|
91 |
),
|
92 |
-
embedding_func=
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
97 |
),
|
98 |
-
embedding_dim=3072,
|
99 |
-
max_token_size=8192,
|
100 |
)
|
101 |
|
102 |
# Process document
|
|
|
12 |
import argparse
|
13 |
import asyncio
|
14 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
15 |
+
from lightrag.utils import EmbeddingFunc
|
16 |
from raganything.raganything import RAGAnything
|
17 |
|
18 |
|
|
|
90 |
base_url=base_url,
|
91 |
**kwargs,
|
92 |
),
|
93 |
+
embedding_func=EmbeddingFunc(
|
94 |
+
embedding_dim=3072,
|
95 |
+
max_token_size=8192,
|
96 |
+
func=lambda texts: openai_embed(
|
97 |
+
texts,
|
98 |
+
model="text-embedding-3-large",
|
99 |
+
api_key=api_key,
|
100 |
+
base_url=base_url,
|
101 |
+
),
|
102 |
),
|
|
|
|
|
103 |
)
|
104 |
|
105 |
# Process document
|