zrguo commited on
Commit
8b556d2
·
1 Parent(s): a21030f

Update README

Browse files
Files changed (2) hide show
  1. README-zh.md +92 -34
  2. README.md +85 -86
README-zh.md CHANGED
@@ -1108,40 +1108,98 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
1108
  pip install raganything
1109
  ```
1110
  2. 处理多模态文档:
1111
- ```python
1112
- import asyncio
1113
- from raganything import RAGAnything
1114
- from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1115
-
1116
- async def main():
1117
- # 使用LightRAG集成初始化RAGAnything
1118
- rag = RAGAnything(
1119
- working_dir="./rag_storage",
1120
- llm_model_func=lambda prompt, **kwargs: openai_complete_if_cache(
1121
- "gpt-4o-mini", prompt, api_key="your-api-key", **kwargs
1122
- ),
1123
- embedding_func=lambda texts: openai_embed(
1124
- texts, model="text-embedding-3-large", api_key="your-api-key"
1125
- ),
1126
- embedding_dim=3072,
1127
- )
1128
-
1129
- # 处理多模态文档
1130
- await rag.process_document_complete(
1131
- file_path="path/to/your/document.pdf",
1132
- output_dir="./output"
1133
- )
1134
-
1135
- # 查询多模态内容
1136
- result = await rag.query_with_multimodal(
1137
- "图表中显示的主要发现是什么?",
1138
- mode="hybrid"
1139
- )
1140
- print(result)
1141
-
1142
- if __name__ == "__main__":
1143
- asyncio.run(main())
1144
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1145
 
1146
  如需详细文档和高级用法,请参阅 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。
1147
 
 
1108
  pip install raganything
1109
  ```
1110
  2. 处理多模态文档:
1111
+ <details>
1112
+ <summary> <b> RAGAnything 使用实例 </b></summary>
1113
+ ```python
1114
+ import asyncio
1115
+ from raganything import RAGAnything
1116
+ from lightrag import LightRAG
1117
+ from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1118
+ from lightrag.utils import EmbeddingFunc
1119
+ import os
1120
+
1121
+ async def load_existing_lightrag():
1122
+ # 首先,创建或加载现有的 LightRAG 实例
1123
+ lightrag_working_dir = "./existing_lightrag_storage"
1124
+
1125
+ # 检查是否存在之前的 LightRAG 实例
1126
+ if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1127
+ print("✅ Found existing LightRAG instance, loading...")
1128
+ else:
1129
+ print("❌ No existing LightRAG instance found, will create new one")
1130
+
1131
+ # 使用您的配置创建/加载 LightRAG 实例
1132
+ lightrag_instance = LightRAG(
1133
+ working_dir=lightrag_working_dir,
1134
+ llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
1135
+ "gpt-4o-mini",
1136
+ prompt,
1137
+ system_prompt=system_prompt,
1138
+ history_messages=history_messages,
1139
+ api_key="your-api-key",
1140
+ **kwargs,
1141
+ ),
1142
+ embedding_func=EmbeddingFunc(
1143
+ embedding_dim=3072,
1144
+ max_token_size=8192,
1145
+ func=lambda texts: openai_embed(
1146
+ texts,
1147
+ model="text-embedding-3-large",
1148
+ api_key=api_key,
1149
+ base_url=base_url,
1150
+ ),
1151
+ )
1152
+ )
1153
+
1154
+ # 初始化存储(如果有现有数据,这将加载现有数据)
1155
+ await lightrag_instance.initialize_storages()
1156
+
1157
+ # 现在使用现有的 LightRAG 实例初始化 RAGAnything
1158
+ rag = RAGAnything(
1159
+ lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
1160
+ # 仅需要视觉模型用于多模态处理
1161
+ vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
1162
+ "gpt-4o",
1163
+ "",
1164
+ system_prompt=None,
1165
+ history_messages=[],
1166
+ messages=[
1167
+ {"role": "system", "content": system_prompt} if system_prompt else None,
1168
+ {"role": "user", "content": [
1169
+ {"type": "text", "text": prompt},
1170
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
1171
+ ]} if image_data else {"role": "user", "content": prompt}
1172
+ ],
1173
+ api_key="your-api-key",
1174
+ **kwargs,
1175
+ ) if image_data else openai_complete_if_cache(
1176
+ "gpt-4o-mini",
1177
+ prompt,
1178
+ system_prompt=system_prompt,
1179
+ history_messages=history_messages,
1180
+ api_key="your-api-key",
1181
+ **kwargs,
1182
+ )
1183
+ # 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
1184
+ )
1185
+
1186
+ # 查询现有的知识库
1187
+ result = await rag.query_with_multimodal(
1188
+ "What data has been processed in this LightRAG instance?",
1189
+ mode="hybrid"
1190
+ )
1191
+ print("Query result:", result)
1192
+
1193
+ # 向现有的 LightRAG 实例添加新的多模态文档
1194
+ await rag.process_document_complete(
1195
+ file_path="path/to/new/multimodal_document.pdf",
1196
+ output_dir="./output"
1197
+ )
1198
+
1199
+ if __name__ == "__main__":
1200
+ asyncio.run(load_existing_lightrag())
1201
+ ```
1202
+ </details>
1203
 
1204
  如需详细文档和高级用法,请参阅 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。
1205
 
README.md CHANGED
@@ -1159,99 +1159,98 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
1159
  pip install raganything
1160
  ```
1161
  2. Process multimodal documents:
1162
-
1163
- <details>
1164
- <summary> <b> RAGAnything Usage Example </b></summary>
1165
- ```python
1166
- import asyncio
1167
- from raganything import RAGAnything
1168
- from lightrag import LightRAG
1169
- from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1170
- from lightrag.utils import EmbeddingFunc
1171
- import os
1172
-
1173
- async def load_existing_lightrag():
1174
- # First, create or load an existing LightRAG instance
1175
- lightrag_working_dir = "./existing_lightrag_storage"
1176
-
1177
- # Check if previous LightRAG instance exists
1178
- if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1179
- print("✅ Found existing LightRAG instance, loading...")
1180
- else:
1181
- print("❌ No existing LightRAG instance found, will create new one")
1182
-
1183
- # Create/Load LightRAG instance with your configurations
1184
- lightrag_instance = LightRAG(
1185
- working_dir=lightrag_working_dir,
1186
- llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
1187
- "gpt-4o-mini",
1188
- prompt,
1189
- system_prompt=system_prompt,
1190
- history_messages=history_messages,
1191
- api_key="your-api-key",
1192
- **kwargs,
1193
- ),
1194
- embedding_func=EmbeddingFunc(
1195
- embedding_dim=3072,
1196
- max_token_size=8192,
1197
- func=lambda texts: openai_embed(
1198
- texts,
1199
- model="text-embedding-3-large",
1200
- api_key=api_key,
1201
- base_url=base_url,
1202
  ),
 
 
 
 
 
 
 
 
 
 
1203
  )
1204
- )
1205
 
1206
- # Initialize storage (this will load existing data if available)
1207
- await lightrag_instance.initialize_storages()
1208
-
1209
- # Now initialize RAGAnything with the existing LightRAG instance
1210
- rag = RAGAnything(
1211
- lightrag=lightrag_instance, # Pass the existing LightRAG instance
1212
- # Only need vision model for multimodal processing
1213
- vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
1214
- "gpt-4o",
1215
- "",
1216
- system_prompt=None,
1217
- history_messages=[],
1218
- messages=[
1219
- {"role": "system", "content": system_prompt} if system_prompt else None,
1220
- {"role": "user", "content": [
1221
- {"type": "text", "text": prompt},
1222
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
1223
- ]} if image_data else {"role": "user", "content": prompt}
1224
- ],
1225
- api_key="your-api-key",
1226
- **kwargs,
1227
- ) if image_data else openai_complete_if_cache(
1228
- "gpt-4o-mini",
1229
- prompt,
1230
- system_prompt=system_prompt,
1231
- history_messages=history_messages,
1232
- api_key="your-api-key",
1233
- **kwargs,
 
 
1234
  )
1235
- # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
1236
- )
1237
 
1238
- # Query the existing knowledge base
1239
- result = await rag.query_with_multimodal(
1240
- "What data has been processed in this LightRAG instance?",
1241
- mode="hybrid"
1242
- )
1243
- print("Query result:", result)
1244
 
1245
- # Add new multimodal documents to the existing LightRAG instance
1246
- await rag.process_document_complete(
1247
- file_path="path/to/new/multimodal_document.pdf",
1248
- output_dir="./output"
1249
- )
1250
 
1251
- if __name__ == "__main__":
1252
- asyncio.run(load_existing_lightrag())
1253
- ```
1254
- </details>
1255
 
1256
  For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
1257
 
 
1159
  pip install raganything
1160
  ```
1161
  2. Process multimodal documents:
1162
+ <details>
1163
+ <summary> <b> RAGAnything Usage Example </b></summary>
1164
+ ```python
1165
+ import asyncio
1166
+ from raganything import RAGAnything
1167
+ from lightrag import LightRAG
1168
+ from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1169
+ from lightrag.utils import EmbeddingFunc
1170
+ import os
1171
+
1172
+ async def load_existing_lightrag():
1173
+ # First, create or load an existing LightRAG instance
1174
+ lightrag_working_dir = "./existing_lightrag_storage"
1175
+
1176
+ # Check if previous LightRAG instance exists
1177
+ if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1178
+ print("✅ Found existing LightRAG instance, loading...")
1179
+ else:
1180
+ print("❌ No existing LightRAG instance found, will create new one")
1181
+
1182
+ # Create/Load LightRAG instance with your configurations
1183
+ lightrag_instance = LightRAG(
1184
+ working_dir=lightrag_working_dir,
1185
+ llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
1186
+ "gpt-4o-mini",
1187
+ prompt,
1188
+ system_prompt=system_prompt,
1189
+ history_messages=history_messages,
1190
+ api_key="your-api-key",
1191
+ **kwargs,
 
 
 
 
 
 
 
 
 
 
1192
  ),
1193
+ embedding_func=EmbeddingFunc(
1194
+ embedding_dim=3072,
1195
+ max_token_size=8192,
1196
+ func=lambda texts: openai_embed(
1197
+ texts,
1198
+ model="text-embedding-3-large",
1199
+ api_key=api_key,
1200
+ base_url=base_url,
1201
+ ),
1202
+ )
1203
  )
 
1204
 
1205
+ # Initialize storage (this will load existing data if available)
1206
+ await lightrag_instance.initialize_storages()
1207
+
1208
+ # Now initialize RAGAnything with the existing LightRAG instance
1209
+ rag = RAGAnything(
1210
+ lightrag=lightrag_instance, # Pass the existing LightRAG instance
1211
+ # Only need vision model for multimodal processing
1212
+ vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
1213
+ "gpt-4o",
1214
+ "",
1215
+ system_prompt=None,
1216
+ history_messages=[],
1217
+ messages=[
1218
+ {"role": "system", "content": system_prompt} if system_prompt else None,
1219
+ {"role": "user", "content": [
1220
+ {"type": "text", "text": prompt},
1221
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
1222
+ ]} if image_data else {"role": "user", "content": prompt}
1223
+ ],
1224
+ api_key="your-api-key",
1225
+ **kwargs,
1226
+ ) if image_data else openai_complete_if_cache(
1227
+ "gpt-4o-mini",
1228
+ prompt,
1229
+ system_prompt=system_prompt,
1230
+ history_messages=history_messages,
1231
+ api_key="your-api-key",
1232
+ **kwargs,
1233
+ )
1234
+ # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
1235
  )
 
 
1236
 
1237
+ # Query the existing knowledge base
1238
+ result = await rag.query_with_multimodal(
1239
+ "What data has been processed in this LightRAG instance?",
1240
+ mode="hybrid"
1241
+ )
1242
+ print("Query result:", result)
1243
 
1244
+ # Add new multimodal documents to the existing LightRAG instance
1245
+ await rag.process_document_complete(
1246
+ file_path="path/to/new/multimodal_document.pdf",
1247
+ output_dir="./output"
1248
+ )
1249
 
1250
+ if __name__ == "__main__":
1251
+ asyncio.run(load_existing_lightrag())
1252
+ ```
1253
+ </details>
1254
 
1255
  For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
1256