yangdx commited on
Commit
77d8960
·
1 Parent(s): 3666598

Update sample code for OpenAI and OpenAI compatible

Browse files
examples/lightrag_openai_compatible_demo.py CHANGED
@@ -1,13 +1,83 @@
1
  import os
2
  import asyncio
 
 
 
3
  from lightrag import LightRAG, QueryParam
4
- from lightrag.llm.openai import openai_complete_if_cache, openai_embed
5
- from lightrag.utils import EmbeddingFunc
 
6
  import numpy as np
7
  from lightrag.kg.shared_storage import initialize_pipeline_status
8
 
9
  WORKING_DIR = "./dickens"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if not os.path.exists(WORKING_DIR):
12
  os.mkdir(WORKING_DIR)
13
 
@@ -16,22 +86,21 @@ async def llm_model_func(
16
  prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
17
  ) -> str:
18
  return await openai_complete_if_cache(
19
- "solar-mini",
20
  prompt,
21
  system_prompt=system_prompt,
22
  history_messages=history_messages,
23
- api_key=os.getenv("UPSTAGE_API_KEY"),
24
- base_url="https://api.upstage.ai/v1/solar",
25
  **kwargs,
26
  )
27
 
28
 
29
  async def embedding_func(texts: list[str]) -> np.ndarray:
30
- return await openai_embed(
31
- texts,
32
- model="solar-embedding-1-large-query",
33
- api_key=os.getenv("UPSTAGE_API_KEY"),
34
- base_url="https://api.upstage.ai/v1/solar",
35
  )
36
 
37
 
@@ -54,6 +123,12 @@ async def test_funcs():
54
  # asyncio.run(test_funcs())
55
 
56
 
 
 
 
 
 
 
57
  async def initialize_rag():
58
  embedding_dimension = await get_embedding_dim()
59
  print(f"Detected embedding dimension: {embedding_dimension}")
@@ -83,37 +158,66 @@ async def main():
83
  await rag.ainsert(f.read())
84
 
85
  # Perform naive search
86
- print(
87
- await rag.aquery(
88
- "What are the top themes in this story?", param=QueryParam(mode="naive")
89
- )
 
 
90
  )
 
 
 
 
91
 
92
  # Perform local search
93
- print(
94
- await rag.aquery(
95
- "What are the top themes in this story?", param=QueryParam(mode="local")
96
- )
 
 
97
  )
 
 
 
 
98
 
99
  # Perform global search
100
- print(
101
- await rag.aquery(
102
- "What are the top themes in this story?",
103
- param=QueryParam(mode="global"),
104
- )
 
105
  )
 
 
 
 
106
 
107
  # Perform hybrid search
108
- print(
109
- await rag.aquery(
110
- "What are the top themes in this story?",
111
- param=QueryParam(mode="hybrid"),
112
- )
 
113
  )
 
 
 
 
 
114
  except Exception as e:
115
  print(f"An error occurred: {e}")
 
 
 
116
 
117
 
118
  if __name__ == "__main__":
 
 
119
  asyncio.run(main())
 
 
1
  import os
2
  import asyncio
3
+ import inspect
4
+ import logging
5
+ import logging.config
6
  from lightrag import LightRAG, QueryParam
7
+ from lightrag.llm.openai import openai_complete_if_cache
8
+ from lightrag.llm.ollama import ollama_embed
9
+ from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug
10
  import numpy as np
11
  from lightrag.kg.shared_storage import initialize_pipeline_status
12
 
13
  WORKING_DIR = "./dickens"
14
 
15
+
16
+ def configure_logging():
17
+ """Configure logging for the application"""
18
+
19
+ # Reset any existing handlers to ensure clean configuration
20
+ for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
21
+ logger_instance = logging.getLogger(logger_name)
22
+ logger_instance.handlers = []
23
+ logger_instance.filters = []
24
+
25
+ # Get log directory path from environment variable or use current directory
26
+ log_dir = os.getenv("LOG_DIR", os.getcwd())
27
+ log_file_path = os.path.abspath(
28
+ os.path.join(log_dir, "lightrag_compatible_demo.log")
29
+ )
30
+
31
+ print(f"\nLightRAG compatible demo log file: {log_file_path}\n")
32
+ os.makedirs(os.path.dirname(log_dir), exist_ok=True)
33
+
34
+ # Get log file max size and backup count from environment variables
35
+ log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
36
+ log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
37
+
38
+ logging.config.dictConfig(
39
+ {
40
+ "version": 1,
41
+ "disable_existing_loggers": False,
42
+ "formatters": {
43
+ "default": {
44
+ "format": "%(levelname)s: %(message)s",
45
+ },
46
+ "detailed": {
47
+ "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
48
+ },
49
+ },
50
+ "handlers": {
51
+ "console": {
52
+ "formatter": "default",
53
+ "class": "logging.StreamHandler",
54
+ "stream": "ext://sys.stderr",
55
+ },
56
+ "file": {
57
+ "formatter": "detailed",
58
+ "class": "logging.handlers.RotatingFileHandler",
59
+ "filename": log_file_path,
60
+ "maxBytes": log_max_bytes,
61
+ "backupCount": log_backup_count,
62
+ "encoding": "utf-8",
63
+ },
64
+ },
65
+ "loggers": {
66
+ "lightrag": {
67
+ "handlers": ["console", "file"],
68
+ "level": "INFO",
69
+ "propagate": False,
70
+ },
71
+ },
72
+ }
73
+ )
74
+
75
+ # Set the logger level to INFO
76
+ logger.setLevel(logging.INFO)
77
+ # Enable verbose debug if needed
78
+ set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
79
+
80
+
81
  if not os.path.exists(WORKING_DIR):
82
  os.mkdir(WORKING_DIR)
83
 
 
86
  prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
87
  ) -> str:
88
  return await openai_complete_if_cache(
89
+ "deepseek-chat",
90
  prompt,
91
  system_prompt=system_prompt,
92
  history_messages=history_messages,
93
+ api_key=os.getenv("OPENAI_API_KEY"),
94
+ base_url="https://api.deepseek.com",
95
  **kwargs,
96
  )
97
 
98
 
99
  async def embedding_func(texts: list[str]) -> np.ndarray:
100
+ return await ollama_embed(
101
+ texts=texts,
102
+ embed_model="bge-m3:latest",
103
+ host="http://m4.lan.znipower.com:11434",
 
104
  )
105
 
106
 
 
123
  # asyncio.run(test_funcs())
124
 
125
 
126
+ async def print_stream(stream):
127
+ async for chunk in stream:
128
+ if chunk:
129
+ print(chunk, end="", flush=True)
130
+
131
+
132
  async def initialize_rag():
133
  embedding_dimension = await get_embedding_dim()
134
  print(f"Detected embedding dimension: {embedding_dimension}")
 
158
  await rag.ainsert(f.read())
159
 
160
  # Perform naive search
161
+ print("\n=====================")
162
+ print("Query mode: naive")
163
+ print("=====================")
164
+ resp = await rag.aquery(
165
+ "What are the top themes in this story?",
166
+ param=QueryParam(mode="naive", stream=True),
167
  )
168
+ if inspect.isasyncgen(resp):
169
+ await print_stream(resp)
170
+ else:
171
+ print(resp)
172
 
173
  # Perform local search
174
+ print("\n=====================")
175
+ print("Query mode: local")
176
+ print("=====================")
177
+ resp = await rag.aquery(
178
+ "What are the top themes in this story?",
179
+ param=QueryParam(mode="local", stream=True),
180
  )
181
+ if inspect.isasyncgen(resp):
182
+ await print_stream(resp)
183
+ else:
184
+ print(resp)
185
 
186
  # Perform global search
187
+ print("\n=====================")
188
+ print("Query mode: global")
189
+ print("=====================")
190
+ resp = await rag.aquery(
191
+ "What are the top themes in this story?",
192
+ param=QueryParam(mode="global", stream=True),
193
  )
194
+ if inspect.isasyncgen(resp):
195
+ await print_stream(resp)
196
+ else:
197
+ print(resp)
198
 
199
  # Perform hybrid search
200
+ print("\n=====================")
201
+ print("Query mode: hybrid")
202
+ print("=====================")
203
+ resp = await rag.aquery(
204
+ "What are the top themes in this story?",
205
+ param=QueryParam(mode="hybrid", stream=True),
206
  )
207
+ if inspect.isasyncgen(resp):
208
+ await print_stream(resp)
209
+ else:
210
+ print(resp)
211
+
212
  except Exception as e:
213
  print(f"An error occurred: {e}")
214
+ finally:
215
+ if rag:
216
+ await rag.finalize_storages()
217
 
218
 
219
  if __name__ == "__main__":
220
+ # Configure logging before running the main function
221
+ configure_logging()
222
  asyncio.run(main())
223
+ print("\nDone!")
examples/lightrag_openai_compatible_stream_demo.py DELETED
@@ -1,72 +0,0 @@
1
- import inspect
2
- import os
3
- import asyncio
4
- from lightrag import LightRAG
5
- from lightrag.llm import openai_complete, openai_embed
6
- from lightrag.utils import EmbeddingFunc, always_get_an_event_loop
7
- from lightrag import QueryParam
8
- from lightrag.kg.shared_storage import initialize_pipeline_status
9
-
10
- # WorkingDir
11
- ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
12
- WORKING_DIR = os.path.join(ROOT_DIR, "dickens")
13
- if not os.path.exists(WORKING_DIR):
14
- os.mkdir(WORKING_DIR)
15
- print(f"WorkingDir: {WORKING_DIR}")
16
-
17
- api_key = "empty"
18
-
19
-
20
- async def initialize_rag():
21
- rag = LightRAG(
22
- working_dir=WORKING_DIR,
23
- llm_model_func=openai_complete,
24
- llm_model_name="qwen2.5-14b-instruct@4bit",
25
- llm_model_max_async=4,
26
- llm_model_max_token_size=32768,
27
- llm_model_kwargs={"base_url": "http://127.0.0.1:1234/v1", "api_key": api_key},
28
- embedding_func=EmbeddingFunc(
29
- embedding_dim=1024,
30
- max_token_size=8192,
31
- func=lambda texts: openai_embed(
32
- texts=texts,
33
- model="text-embedding-bge-m3",
34
- base_url="http://127.0.0.1:1234/v1",
35
- api_key=api_key,
36
- ),
37
- ),
38
- )
39
-
40
- await rag.initialize_storages()
41
- await initialize_pipeline_status()
42
-
43
- return rag
44
-
45
-
46
- async def print_stream(stream):
47
- async for chunk in stream:
48
- if chunk:
49
- print(chunk, end="", flush=True)
50
-
51
-
52
- def main():
53
- # Initialize RAG instance
54
- rag = asyncio.run(initialize_rag())
55
-
56
- with open("./book.txt", "r", encoding="utf-8") as f:
57
- rag.insert(f.read())
58
-
59
- resp = rag.query(
60
- "What are the top themes in this story?",
61
- param=QueryParam(mode="hybrid", stream=True),
62
- )
63
-
64
- loop = always_get_an_event_loop()
65
- if inspect.isasyncgen(resp):
66
- loop.run_until_complete(print_stream(resp))
67
- else:
68
- print(resp)
69
-
70
-
71
- if __name__ == "__main__":
72
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/lightrag_openai_demo.py CHANGED
@@ -9,9 +9,10 @@ from lightrag.utils import logger, set_verbose_debug
9
 
10
  WORKING_DIR = "./dickens"
11
 
 
12
  def configure_logging():
13
  """Configure logging for the application"""
14
-
15
  # Reset any existing handlers to ensure clean configuration
16
  for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
17
  logger_instance = logging.getLogger(logger_name)
@@ -65,12 +66,13 @@ def configure_logging():
65
  },
66
  }
67
  )
68
-
69
  # Set the logger level to INFO
70
  logger.setLevel(logging.INFO)
71
  # Enable verbose debug if needed
72
  set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
73
 
 
74
  if not os.path.exists(WORKING_DIR):
75
  os.mkdir(WORKING_DIR)
76
 
@@ -97,6 +99,9 @@ async def main():
97
  await rag.ainsert(f.read())
98
 
99
  # Perform naive search
 
 
 
100
  print(
101
  await rag.aquery(
102
  "What are the top themes in this story?", param=QueryParam(mode="naive")
@@ -104,6 +109,9 @@ async def main():
104
  )
105
 
106
  # Perform local search
 
 
 
107
  print(
108
  await rag.aquery(
109
  "What are the top themes in this story?", param=QueryParam(mode="local")
@@ -111,6 +119,9 @@ async def main():
111
  )
112
 
113
  # Perform global search
 
 
 
114
  print(
115
  await rag.aquery(
116
  "What are the top themes in this story?", param=QueryParam(mode="global")
@@ -118,6 +129,9 @@ async def main():
118
  )
119
 
120
  # Perform hybrid search
 
 
 
121
  print(
122
  await rag.aquery(
123
  "What are the top themes in this story?", param=QueryParam(mode="hybrid")
 
9
 
10
  WORKING_DIR = "./dickens"
11
 
12
+
13
  def configure_logging():
14
  """Configure logging for the application"""
15
+
16
  # Reset any existing handlers to ensure clean configuration
17
  for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
18
  logger_instance = logging.getLogger(logger_name)
 
66
  },
67
  }
68
  )
69
+
70
  # Set the logger level to INFO
71
  logger.setLevel(logging.INFO)
72
  # Enable verbose debug if needed
73
  set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
74
 
75
+
76
  if not os.path.exists(WORKING_DIR):
77
  os.mkdir(WORKING_DIR)
78
 
 
99
  await rag.ainsert(f.read())
100
 
101
  # Perform naive search
102
+ print("\n=====================")
103
+ print("Query mode: naive")
104
+ print("=====================")
105
  print(
106
  await rag.aquery(
107
  "What are the top themes in this story?", param=QueryParam(mode="naive")
 
109
  )
110
 
111
  # Perform local search
112
+ print("\n=====================")
113
+ print("Query mode: local")
114
+ print("=====================")
115
  print(
116
  await rag.aquery(
117
  "What are the top themes in this story?", param=QueryParam(mode="local")
 
119
  )
120
 
121
  # Perform global search
122
+ print("\n=====================")
123
+ print("Query mode: global")
124
+ print("=====================")
125
  print(
126
  await rag.aquery(
127
  "What are the top themes in this story?", param=QueryParam(mode="global")
 
129
  )
130
 
131
  # Perform hybrid search
132
+ print("\n=====================")
133
+ print("Query mode: hybrid")
134
+ print("=====================")
135
  print(
136
  await rag.aquery(
137
  "What are the top themes in this story?", param=QueryParam(mode="hybrid")