zrguo
commited on
Commit
·
8b556d2
1
Parent(s):
a21030f
Update README
Browse files- README-zh.md +92 -34
- README.md +85 -86
README-zh.md
CHANGED
@@ -1108,40 +1108,98 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
|
|
1108 |
pip install raganything
|
1109 |
```
|
1110 |
2. 处理多模态文档:
|
1111 |
-
|
1112 |
-
|
1113 |
-
|
1114 |
-
|
1115 |
-
|
1116 |
-
|
1117 |
-
|
1118 |
-
|
1119 |
-
|
1120 |
-
|
1121 |
-
|
1122 |
-
|
1123 |
-
|
1124 |
-
|
1125 |
-
|
1126 |
-
|
1127 |
-
|
1128 |
-
|
1129 |
-
|
1130 |
-
|
1131 |
-
|
1132 |
-
|
1133 |
-
|
1134 |
-
|
1135 |
-
|
1136 |
-
|
1137 |
-
|
1138 |
-
|
1139 |
-
|
1140 |
-
|
1141 |
-
|
1142 |
-
|
1143 |
-
|
1144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1145 |
|
1146 |
如需详细文档和高级用法,请参阅 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。
|
1147 |
|
|
|
1108 |
pip install raganything
|
1109 |
```
|
1110 |
2. 处理多模态文档:
|
1111 |
+
<details>
|
1112 |
+
<summary> <b> RAGAnything 使用实例 </b></summary>
|
1113 |
+
```python
|
1114 |
+
import asyncio
|
1115 |
+
from raganything import RAGAnything
|
1116 |
+
from lightrag import LightRAG
|
1117 |
+
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
1118 |
+
from lightrag.utils import EmbeddingFunc
|
1119 |
+
import os
|
1120 |
+
|
1121 |
+
async def load_existing_lightrag():
|
1122 |
+
# 首先,创建或加载现有的 LightRAG 实例
|
1123 |
+
lightrag_working_dir = "./existing_lightrag_storage"
|
1124 |
+
|
1125 |
+
# 检查是否存在之前的 LightRAG 实例
|
1126 |
+
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
1127 |
+
print("✅ Found existing LightRAG instance, loading...")
|
1128 |
+
else:
|
1129 |
+
print("❌ No existing LightRAG instance found, will create new one")
|
1130 |
+
|
1131 |
+
# 使用您的配置创建/加载 LightRAG 实例
|
1132 |
+
lightrag_instance = LightRAG(
|
1133 |
+
working_dir=lightrag_working_dir,
|
1134 |
+
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
|
1135 |
+
"gpt-4o-mini",
|
1136 |
+
prompt,
|
1137 |
+
system_prompt=system_prompt,
|
1138 |
+
history_messages=history_messages,
|
1139 |
+
api_key="your-api-key",
|
1140 |
+
**kwargs,
|
1141 |
+
),
|
1142 |
+
embedding_func=EmbeddingFunc(
|
1143 |
+
embedding_dim=3072,
|
1144 |
+
max_token_size=8192,
|
1145 |
+
func=lambda texts: openai_embed(
|
1146 |
+
texts,
|
1147 |
+
model="text-embedding-3-large",
|
1148 |
+
api_key=api_key,
|
1149 |
+
base_url=base_url,
|
1150 |
+
),
|
1151 |
+
)
|
1152 |
+
)
|
1153 |
+
|
1154 |
+
# 初始化存储(如果有现有数据,这将加载现有数据)
|
1155 |
+
await lightrag_instance.initialize_storages()
|
1156 |
+
|
1157 |
+
# 现在使用现有的 LightRAG 实例初始化 RAGAnything
|
1158 |
+
rag = RAGAnything(
|
1159 |
+
lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
|
1160 |
+
# 仅需要视觉模型用于多模态处理
|
1161 |
+
vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
|
1162 |
+
"gpt-4o",
|
1163 |
+
"",
|
1164 |
+
system_prompt=None,
|
1165 |
+
history_messages=[],
|
1166 |
+
messages=[
|
1167 |
+
{"role": "system", "content": system_prompt} if system_prompt else None,
|
1168 |
+
{"role": "user", "content": [
|
1169 |
+
{"type": "text", "text": prompt},
|
1170 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
|
1171 |
+
]} if image_data else {"role": "user", "content": prompt}
|
1172 |
+
],
|
1173 |
+
api_key="your-api-key",
|
1174 |
+
**kwargs,
|
1175 |
+
) if image_data else openai_complete_if_cache(
|
1176 |
+
"gpt-4o-mini",
|
1177 |
+
prompt,
|
1178 |
+
system_prompt=system_prompt,
|
1179 |
+
history_messages=history_messages,
|
1180 |
+
api_key="your-api-key",
|
1181 |
+
**kwargs,
|
1182 |
+
)
|
1183 |
+
# 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
|
1184 |
+
)
|
1185 |
+
|
1186 |
+
# 查询现有的知识库
|
1187 |
+
result = await rag.query_with_multimodal(
|
1188 |
+
"What data has been processed in this LightRAG instance?",
|
1189 |
+
mode="hybrid"
|
1190 |
+
)
|
1191 |
+
print("Query result:", result)
|
1192 |
+
|
1193 |
+
# 向现有的 LightRAG 实例添加新的多模态文档
|
1194 |
+
await rag.process_document_complete(
|
1195 |
+
file_path="path/to/new/multimodal_document.pdf",
|
1196 |
+
output_dir="./output"
|
1197 |
+
)
|
1198 |
+
|
1199 |
+
if __name__ == "__main__":
|
1200 |
+
asyncio.run(load_existing_lightrag())
|
1201 |
+
```
|
1202 |
+
</details>
|
1203 |
|
1204 |
如需详细文档和高级用法,请参阅 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。
|
1205 |
|
README.md
CHANGED
@@ -1159,99 +1159,98 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
|
1159 |
pip install raganything
|
1160 |
```
|
1161 |
2. Process multimodal documents:
|
1162 |
-
|
1163 |
-
<
|
1164 |
-
|
1165 |
-
|
1166 |
-
|
1167 |
-
|
1168 |
-
|
1169 |
-
|
1170 |
-
|
1171 |
-
|
1172 |
-
|
1173 |
-
|
1174 |
-
|
1175 |
-
|
1176 |
-
|
1177 |
-
|
1178 |
-
|
1179 |
-
|
1180 |
-
|
1181 |
-
|
1182 |
-
|
1183 |
-
|
1184 |
-
|
1185 |
-
|
1186 |
-
|
1187 |
-
|
1188 |
-
|
1189 |
-
|
1190 |
-
|
1191 |
-
|
1192 |
-
**kwargs,
|
1193 |
-
),
|
1194 |
-
embedding_func=EmbeddingFunc(
|
1195 |
-
embedding_dim=3072,
|
1196 |
-
max_token_size=8192,
|
1197 |
-
func=lambda texts: openai_embed(
|
1198 |
-
texts,
|
1199 |
-
model="text-embedding-3-large",
|
1200 |
-
api_key=api_key,
|
1201 |
-
base_url=base_url,
|
1202 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1203 |
)
|
1204 |
-
)
|
1205 |
|
1206 |
-
|
1207 |
-
|
1208 |
-
|
1209 |
-
|
1210 |
-
|
1211 |
-
|
1212 |
-
|
1213 |
-
|
1214 |
-
|
1215 |
-
|
1216 |
-
|
1217 |
-
|
1218 |
-
|
1219 |
-
|
1220 |
-
|
1221 |
-
|
1222 |
-
|
1223 |
-
|
1224 |
-
|
1225 |
-
|
1226 |
-
|
1227 |
-
|
1228 |
-
|
1229 |
-
|
1230 |
-
|
1231 |
-
|
1232 |
-
|
1233 |
-
|
|
|
|
|
1234 |
)
|
1235 |
-
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
|
1236 |
-
)
|
1237 |
|
1238 |
-
|
1239 |
-
|
1240 |
-
|
1241 |
-
|
1242 |
-
|
1243 |
-
|
1244 |
|
1245 |
-
|
1246 |
-
|
1247 |
-
|
1248 |
-
|
1249 |
-
|
1250 |
|
1251 |
-
|
1252 |
-
|
1253 |
-
|
1254 |
-
</details>
|
1255 |
|
1256 |
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
|
1257 |
|
|
|
1159 |
pip install raganything
|
1160 |
```
|
1161 |
2. Process multimodal documents:
|
1162 |
+
<details>
|
1163 |
+
<summary> <b> RAGAnything Usage Example </b></summary>
|
1164 |
+
```python
|
1165 |
+
import asyncio
|
1166 |
+
from raganything import RAGAnything
|
1167 |
+
from lightrag import LightRAG
|
1168 |
+
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
1169 |
+
from lightrag.utils import EmbeddingFunc
|
1170 |
+
import os
|
1171 |
+
|
1172 |
+
async def load_existing_lightrag():
|
1173 |
+
# First, create or load an existing LightRAG instance
|
1174 |
+
lightrag_working_dir = "./existing_lightrag_storage"
|
1175 |
+
|
1176 |
+
# Check if previous LightRAG instance exists
|
1177 |
+
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
1178 |
+
print("✅ Found existing LightRAG instance, loading...")
|
1179 |
+
else:
|
1180 |
+
print("❌ No existing LightRAG instance found, will create new one")
|
1181 |
+
|
1182 |
+
# Create/Load LightRAG instance with your configurations
|
1183 |
+
lightrag_instance = LightRAG(
|
1184 |
+
working_dir=lightrag_working_dir,
|
1185 |
+
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
|
1186 |
+
"gpt-4o-mini",
|
1187 |
+
prompt,
|
1188 |
+
system_prompt=system_prompt,
|
1189 |
+
history_messages=history_messages,
|
1190 |
+
api_key="your-api-key",
|
1191 |
+
**kwargs,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1192 |
),
|
1193 |
+
embedding_func=EmbeddingFunc(
|
1194 |
+
embedding_dim=3072,
|
1195 |
+
max_token_size=8192,
|
1196 |
+
func=lambda texts: openai_embed(
|
1197 |
+
texts,
|
1198 |
+
model="text-embedding-3-large",
|
1199 |
+
api_key=api_key,
|
1200 |
+
base_url=base_url,
|
1201 |
+
),
|
1202 |
+
)
|
1203 |
)
|
|
|
1204 |
|
1205 |
+
# Initialize storage (this will load existing data if available)
|
1206 |
+
await lightrag_instance.initialize_storages()
|
1207 |
+
|
1208 |
+
# Now initialize RAGAnything with the existing LightRAG instance
|
1209 |
+
rag = RAGAnything(
|
1210 |
+
lightrag=lightrag_instance, # Pass the existing LightRAG instance
|
1211 |
+
# Only need vision model for multimodal processing
|
1212 |
+
vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
|
1213 |
+
"gpt-4o",
|
1214 |
+
"",
|
1215 |
+
system_prompt=None,
|
1216 |
+
history_messages=[],
|
1217 |
+
messages=[
|
1218 |
+
{"role": "system", "content": system_prompt} if system_prompt else None,
|
1219 |
+
{"role": "user", "content": [
|
1220 |
+
{"type": "text", "text": prompt},
|
1221 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
|
1222 |
+
]} if image_data else {"role": "user", "content": prompt}
|
1223 |
+
],
|
1224 |
+
api_key="your-api-key",
|
1225 |
+
**kwargs,
|
1226 |
+
) if image_data else openai_complete_if_cache(
|
1227 |
+
"gpt-4o-mini",
|
1228 |
+
prompt,
|
1229 |
+
system_prompt=system_prompt,
|
1230 |
+
history_messages=history_messages,
|
1231 |
+
api_key="your-api-key",
|
1232 |
+
**kwargs,
|
1233 |
+
)
|
1234 |
+
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
|
1235 |
)
|
|
|
|
|
1236 |
|
1237 |
+
# Query the existing knowledge base
|
1238 |
+
result = await rag.query_with_multimodal(
|
1239 |
+
"What data has been processed in this LightRAG instance?",
|
1240 |
+
mode="hybrid"
|
1241 |
+
)
|
1242 |
+
print("Query result:", result)
|
1243 |
|
1244 |
+
# Add new multimodal documents to the existing LightRAG instance
|
1245 |
+
await rag.process_document_complete(
|
1246 |
+
file_path="path/to/new/multimodal_document.pdf",
|
1247 |
+
output_dir="./output"
|
1248 |
+
)
|
1249 |
|
1250 |
+
if __name__ == "__main__":
|
1251 |
+
asyncio.run(load_existing_lightrag())
|
1252 |
+
```
|
1253 |
+
</details>
|
1254 |
|
1255 |
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
|
1256 |
|