Pankaj Kaushal commited on
Commit
fbf52be
·
1 Parent(s): d7e7eb6

Add LlamaIndex Wrapper and Example Implementations

Browse files

- Updated README.md with new Wrappers section detailing LlamaIndex integration
- Added LlamaIndex wrapper implementation in `lightrag/wrapper/llama_index_impl.py`
- Created two example scripts demonstrating LlamaIndex usage:
- Direct OpenAI integration
- LiteLLM proxy integration
- Added wrapper documentation in `lightrag/wrapper/Readme.md`
- Included comprehensive usage examples and configuration details

README.md CHANGED
@@ -312,7 +312,45 @@ rag = LightRAG(
312
  In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
313
 
314
  </details>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
 
 
 
 
 
 
316
  <details>
317
  <summary> <b>Conversation History Support</b> </summary>
318
 
 
312
  In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
313
 
314
  </details>
315
+ <details>
316
+ <summary> <b>Wrappers</b> </summary>
317
+
318
+ LightRAG supports integration with various frameworks and model providers through wrappers. These wrappers provide a consistent interface while abstracting away the specifics of each framework.
319
+
320
+ ### Current Wrappers
321
+
322
+ 1. **LlamaIndex** (`wrapper/llama_index_impl.py`):
323
+ - Integrates with OpenAI and other providers through LlamaIndex
324
+ - Supports both direct API access and proxy services like LiteLLM
325
+ - Provides consistent interfaces for embeddings and completions
326
+ - See [LlamaIndex Wrapper Documentation](lightrag/wrapper/Readme.md) for detailed setup and examples
327
+
328
+ ### Example Usage
329
+
330
+ ```python
331
+ # Using LlamaIndex with direct OpenAI access
332
+ from lightrag import LightRAG
333
+ from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
334
+ from llama_index.embeddings.openai import OpenAIEmbedding
335
+ from llama_index.llms.openai import OpenAI
336
+
337
+ rag = LightRAG(
338
+ working_dir="your/path",
339
+ llm_model_func=llm_model_func, # LlamaIndex-compatible completion function
340
+ embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
341
+ embedding_dim=1536,
342
+ max_token_size=8192,
343
+ func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
344
+ ),
345
+ )
346
+ ```
347
 
348
+ #### For detailed documentation and examples, see:
349
+ - [LlamaIndex Wrapper Documentation](lightrag/wrapper/Readme.md)
350
+ - [Direct OpenAI Example](examples/lightrag_api_llamaindex_direct_demo_simplified.py)
351
+ - [LiteLLM Proxy Example](examples/lightrag_api_llamaindex_litellm_demo_simplified.py)
352
+
353
+ </details>
354
  <details>
355
  <summary> <b>Conversation History Support</b> </summary>
356
 
examples/lightrag_api_llamaindex_direct_demo_simplified.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from lightrag import LightRAG, QueryParam
3
+ from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
4
+ from lightrag.utils import EmbeddingFunc
5
+ from llama_index.llms.openai import OpenAI
6
+ from llama_index.embeddings.openai import OpenAIEmbedding
7
+ import asyncio
8
+
9
+ # Configure working directory
10
+ DEFAULT_RAG_DIR = "index_default"
11
+ WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}")
12
+ print(f"WORKING_DIR: {WORKING_DIR}")
13
+
14
+ # Model configuration
15
+ LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4")
16
+ print(f"LLM_MODEL: {LLM_MODEL}")
17
+ EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-small")
18
+ print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
19
+ EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
20
+ print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
21
+
22
+ # OpenAI configuration
23
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "your-api-key-here")
24
+
25
+ if not os.path.exists(WORKING_DIR):
26
+ os.mkdir(WORKING_DIR)
27
+
28
+ # Initialize LLM function
29
+ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
30
+ try:
31
+ # Initialize OpenAI if not in kwargs
32
+ if 'llm_instance' not in kwargs:
33
+ llm_instance = OpenAI(
34
+ model=LLM_MODEL,
35
+ api_key=OPENAI_API_KEY,
36
+ temperature=0.7,
37
+ )
38
+ kwargs['llm_instance'] = llm_instance
39
+
40
+ response = await llama_index_complete_if_cache(
41
+ kwargs['llm_instance'],
42
+ prompt,
43
+ system_prompt=system_prompt,
44
+ history_messages=history_messages,
45
+ **kwargs,
46
+ )
47
+ return response
48
+ except Exception as e:
49
+ print(f"LLM request failed: {str(e)}")
50
+ raise
51
+
52
+ # Initialize embedding function
53
+ async def embedding_func(texts):
54
+ try:
55
+ embed_model = OpenAIEmbedding(
56
+ model=EMBEDDING_MODEL,
57
+ api_key=OPENAI_API_KEY,
58
+ )
59
+ return await llama_index_embed(texts, embed_model=embed_model)
60
+ except Exception as e:
61
+ print(f"Embedding failed: {str(e)}")
62
+ raise
63
+
64
+ # Get embedding dimension
65
+ async def get_embedding_dim():
66
+ test_text = ["This is a test sentence."]
67
+ embedding = await embedding_func(test_text)
68
+ embedding_dim = embedding.shape[1]
69
+ print(f"embedding_dim={embedding_dim}")
70
+ return embedding_dim
71
+
72
+ # Initialize RAG instance
73
+ rag = LightRAG(
74
+ working_dir=WORKING_DIR,
75
+ llm_model_func=llm_model_func,
76
+ embedding_func=EmbeddingFunc(
77
+ embedding_dim=asyncio.run(get_embedding_dim()),
78
+ max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
79
+ func=embedding_func,
80
+ ),
81
+ )
82
+
83
+ # Insert example text
84
+ with open("./book.txt", "r", encoding="utf-8") as f:
85
+ rag.insert(f.read())
86
+
87
+ # Test different query modes
88
+ print("\nNaive Search:")
89
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
90
+
91
+ print("\nLocal Search:")
92
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
93
+
94
+ print("\nGlobal Search:")
95
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
96
+
97
+ print("\nHybrid Search:")
98
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
examples/lightrag_api_llamaindex_litellm_demo_simplified.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from lightrag import LightRAG, QueryParam
3
+ from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
4
+ from lightrag.utils import EmbeddingFunc
5
+ from llama_index.llms.litellm import LiteLLM
6
+ from llama_index.embeddings.litellm import LiteLLMEmbedding
7
+ import asyncio
8
+
9
+ # Configure working directory
10
+ DEFAULT_RAG_DIR = "index_default"
11
+ WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}")
12
+ print(f"WORKING_DIR: {WORKING_DIR}")
13
+
14
+ # Model configuration
15
+ LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o")
16
+ print(f"LLM_MODEL: {LLM_MODEL}")
17
+ EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "embedding-model")
18
+ print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
19
+ EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
20
+ print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
21
+
22
+ # LiteLLM configuration
23
+ LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000")
24
+ print(f"LITELLM_URL: {LITELLM_URL}")
25
+ LITELLM_KEY = os.environ.get("LITELLM_KEY", "sk-1234")
26
+
27
+ if not os.path.exists(WORKING_DIR):
28
+ os.mkdir(WORKING_DIR)
29
+
30
+ # Initialize LLM function
31
+ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
32
+ try:
33
+ # Initialize LiteLLM if not in kwargs
34
+ if 'llm_instance' not in kwargs:
35
+ llm_instance = LiteLLM(
36
+ model=f"openai/{LLM_MODEL}", # Format: "provider/model_name"
37
+ api_base=LITELLM_URL,
38
+ api_key=LITELLM_KEY,
39
+ temperature=0.7,
40
+ )
41
+ kwargs['llm_instance'] = llm_instance
42
+
43
+ response = await llama_index_complete_if_cache(
44
+ kwargs['llm_instance'],
45
+ prompt,
46
+ system_prompt=system_prompt,
47
+ history_messages=history_messages,
48
+ **kwargs,
49
+ )
50
+ return response
51
+ except Exception as e:
52
+ print(f"LLM request failed: {str(e)}")
53
+ raise
54
+
55
+ # Initialize embedding function
56
+ async def embedding_func(texts):
57
+ try:
58
+ embed_model = LiteLLMEmbedding(
59
+ model_name=f"openai/{EMBEDDING_MODEL}",
60
+ api_base=LITELLM_URL,
61
+ api_key=LITELLM_KEY,
62
+ )
63
+ return await llama_index_embed(texts, embed_model=embed_model)
64
+ except Exception as e:
65
+ print(f"Embedding failed: {str(e)}")
66
+ raise
67
+
68
+ # Get embedding dimension
69
+ async def get_embedding_dim():
70
+ test_text = ["This is a test sentence."]
71
+ embedding = await embedding_func(test_text)
72
+ embedding_dim = embedding.shape[1]
73
+ print(f"embedding_dim={embedding_dim}")
74
+ return embedding_dim
75
+
76
+ # Initialize RAG instance
77
+ rag = LightRAG(
78
+ working_dir=WORKING_DIR,
79
+ llm_model_func=llm_model_func,
80
+ embedding_func=EmbeddingFunc(
81
+ embedding_dim=asyncio.run(get_embedding_dim()),
82
+ max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
83
+ func=embedding_func,
84
+ ),
85
+ )
86
+
87
+ # Insert example text
88
+ with open("./book.txt", "r", encoding="utf-8") as f:
89
+ rag.insert(f.read())
90
+
91
+ # Test different query modes
92
+ print("\nNaive Search:")
93
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
94
+
95
+ print("\nLocal Search:")
96
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
97
+
98
+ print("\nGlobal Search:")
99
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
100
+
101
+ print("\nHybrid Search:")
102
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
lightrag/wrapper/Readme.md ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Wrapper Directory
2
+
3
+ The `wrapper` directory contains integrations with different frameworks. These wrappers provide a consistent interface to LightRAG while abstracting away the specifics of each framework.
4
+
5
+ ## Wrapper Directory Structure
6
+
7
+ ```
8
+ lightrag/
9
+ ├── wrapper/ # Wrappers for different model providers and frameworks
10
+ │ ├── llama_index_impl.py # LlamaIndex integration for embeddings and completions
11
+ │ └── ... # Other framework wrappers
12
+ ├── kg/ # Knowledge graph implementations
13
+ ├── utils/ # Utility functions and helpers
14
+ └── ...
15
+ ```
16
+ Current wrappers:
17
+
18
+ 1. **LlamaIndex** (`wrapper/llama_index.py`):
19
+ - Provides integration with OpenAI and other providers through LlamaIndex
20
+ - Supports both direct API access and proxy services like LiteLLM
21
+ - Handles embeddings and completions with consistent interfaces
22
+ - See example implementations:
23
+ - [Direct OpenAI Usage](../examples/lightrag_api_llamaindex_direct_demo_simplified.py)
24
+ - [LiteLLM Proxy Usage](../examples/lightrag_api_llamaindex_litellm_demo_simplified.py)
25
+
26
+ <details>
27
+ <summary> <b>Using LlamaIndex</b> </summary>
28
+
29
+ LightRAG supports LlamaIndex for embeddings and completions in two ways: direct OpenAI usage or through LiteLLM proxy.
30
+
31
+ ### Setup
32
+
33
+ First, install the required dependencies:
34
+ ```bash
35
+ pip install llama-index-llms-litellm llama-index-embeddings-litellm
36
+ ```
37
+
38
+ ### Standard OpenAI Usage
39
+
40
+ ```python
41
+ from lightrag import LightRAG
42
+ from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
43
+ from llama_index.embeddings.openai import OpenAIEmbedding
44
+ from llama_index.llms.openai import OpenAI
45
+ from lightrag.utils import EmbeddingFunc
46
+
47
+ # Initialize with direct OpenAI access
48
+ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
49
+ try:
50
+ # Initialize OpenAI if not in kwargs
51
+ if 'llm_instance' not in kwargs:
52
+ llm_instance = OpenAI(
53
+ model="gpt-4",
54
+ api_key="your-openai-key",
55
+ temperature=0.7,
56
+ )
57
+ kwargs['llm_instance'] = llm_instance
58
+
59
+ response = await llama_index_complete_if_cache(
60
+ kwargs['llm_instance'],
61
+ prompt,
62
+ system_prompt=system_prompt,
63
+ history_messages=history_messages,
64
+ **kwargs,
65
+ )
66
+ return response
67
+ except Exception as e:
68
+ logger.error(f"LLM request failed: {str(e)}")
69
+ raise
70
+
71
+ # Initialize LightRAG with OpenAI
72
+ rag = LightRAG(
73
+ working_dir="your/path",
74
+ llm_model_func=llm_model_func,
75
+ embedding_func=EmbeddingFunc(
76
+ embedding_dim=1536,
77
+ max_token_size=8192,
78
+ func=lambda texts: llama_index_embed(
79
+ texts,
80
+ embed_model=OpenAIEmbedding(
81
+ model="text-embedding-3-large",
82
+ api_key="your-openai-key"
83
+ )
84
+ ),
85
+ ),
86
+ )
87
+ ```
88
+
89
+ ### Using LiteLLM Proxy
90
+
91
+ 1. Use any LLM provider through LiteLLM
92
+ 2. Leverage LlamaIndex's embedding and completion capabilities
93
+ 3. Maintain consistent configuration across services
94
+
95
+ ```python
96
+ from lightrag import LightRAG
97
+ from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
98
+ from llama_index.llms.litellm import LiteLLM
99
+ from llama_index.embeddings.litellm import LiteLLMEmbedding
100
+ from lightrag.utils import EmbeddingFunc
101
+
102
+ # Initialize with LiteLLM proxy
103
+ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
104
+ try:
105
+ # Initialize LiteLLM if not in kwargs
106
+ if 'llm_instance' not in kwargs:
107
+ llm_instance = LiteLLM(
108
+ model=f"openai/{settings.LLM_MODEL}", # Format: "provider/model_name"
109
+ api_base=settings.LITELLM_URL,
110
+ api_key=settings.LITELLM_KEY,
111
+ temperature=0.7,
112
+ )
113
+ kwargs['llm_instance'] = llm_instance
114
+
115
+ response = await llama_index_complete_if_cache(
116
+ kwargs['llm_instance'],
117
+ prompt,
118
+ system_prompt=system_prompt,
119
+ history_messages=history_messages,
120
+ **kwargs,
121
+ )
122
+ return response
123
+ except Exception as e:
124
+ logger.error(f"LLM request failed: {str(e)}")
125
+ raise
126
+
127
+ # Initialize LightRAG with LiteLLM
128
+ rag = LightRAG(
129
+ working_dir="your/path",
130
+ llm_model_func=llm_model_func,
131
+ embedding_func=EmbeddingFunc(
132
+ embedding_dim=1536,
133
+ max_token_size=8192,
134
+ func=lambda texts: llama_index_embed(
135
+ texts,
136
+ embed_model=LiteLLMEmbedding(
137
+ model_name=f"openai/{settings.EMBEDDING_MODEL}",
138
+ api_base=settings.LITELLM_URL,
139
+ api_key=settings.LITELLM_KEY,
140
+ )
141
+ ),
142
+ ),
143
+ )
144
+ ```
145
+
146
+ ### Environment Variables
147
+
148
+ For OpenAI direct usage:
149
+ ```bash
150
+ OPENAI_API_KEY=your-openai-key
151
+ ```
152
+
153
+ For LiteLLM proxy:
154
+ ```bash
155
+ # LiteLLM Configuration
156
+ LITELLM_URL=http://litellm:4000
157
+ LITELLM_KEY=your-litellm-key
158
+
159
+ # Model Configuration
160
+ LLM_MODEL=gpt-4
161
+ EMBEDDING_MODEL=text-embedding-3-large
162
+ EMBEDDING_MAX_TOKEN_SIZE=8192
163
+ ```
164
+
165
+ ### Key Differences
166
+ 1. **Direct OpenAI**:
167
+ - Simpler setup
168
+ - Direct API access
169
+ - Requires OpenAI API key
170
+
171
+ 2. **LiteLLM Proxy**:
172
+ - Model provider agnostic
173
+ - Centralized API key management
174
+ - Support for multiple providers
175
+ - Better cost control and monitoring
176
+
177
+ </details>
lightrag/wrapper/__init__.py ADDED
File without changes
lightrag/wrapper/llama_index_impl.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pipmaster as pm
2
+ from llama_index.core.llms import (
3
+ ChatMessage,
4
+ MessageRole,
5
+ ChatResponse,
6
+ )
7
+ from typing import List, Optional
8
+
9
+ # Install required dependencies
10
+ if not pm.is_installed("llama-index"):
11
+ pm.install("llama-index")
12
+
13
+ from llama_index.core.embeddings import BaseEmbedding
14
+ from llama_index.core.settings import Settings as LlamaIndexSettings
15
+ from tenacity import (
16
+ retry,
17
+ stop_after_attempt,
18
+ wait_exponential,
19
+ retry_if_exception_type,
20
+ )
21
+ from lightrag.utils import (
22
+ wrap_embedding_func_with_attrs,
23
+ locate_json_string_body_from_string,
24
+ )
25
+ from lightrag.exceptions import (
26
+ APIConnectionError,
27
+ RateLimitError,
28
+ APITimeoutError,
29
+ )
30
+ import numpy as np
31
+
32
+
33
+ def configure_llama_index(settings: LlamaIndexSettings = None, **kwargs):
34
+ """
35
+ Configure LlamaIndex settings.
36
+
37
+ Args:
38
+ settings: LlamaIndex Settings instance. If None, uses default settings.
39
+ **kwargs: Additional settings to override/configure
40
+ """
41
+ if settings is None:
42
+ settings = LlamaIndexSettings()
43
+
44
+ # Update settings with any provided kwargs
45
+ for key, value in kwargs.items():
46
+ if hasattr(settings, key):
47
+ setattr(settings, key, value)
48
+ else:
49
+ logger.warning(f"Unknown LlamaIndex setting: {key}")
50
+
51
+ # Set as global settings
52
+ LlamaIndexSettings.set_global(settings)
53
+ return settings
54
+
55
+
56
+ def format_chat_messages(messages):
57
+ """Format chat messages into LlamaIndex format."""
58
+ formatted_messages = []
59
+
60
+ for msg in messages:
61
+ role = msg.get("role", "user")
62
+ content = msg.get("content", "")
63
+
64
+ if role == "system":
65
+ formatted_messages.append(
66
+ ChatMessage(role=MessageRole.SYSTEM, content=content)
67
+ )
68
+ elif role == "assistant":
69
+ formatted_messages.append(
70
+ ChatMessage(role=MessageRole.ASSISTANT, content=content)
71
+ )
72
+ elif role == "user":
73
+ formatted_messages.append(
74
+ ChatMessage(role=MessageRole.USER, content=content)
75
+ )
76
+ else:
77
+ logger.warning(f"Unknown role {role}, treating as user message")
78
+ formatted_messages.append(
79
+ ChatMessage(role=MessageRole.USER, content=content)
80
+ )
81
+
82
+ return formatted_messages
83
+
84
+
85
+ @retry(
86
+ stop=stop_after_attempt(3),
87
+ wait=wait_exponential(multiplier=1, min=4, max=60),
88
+ retry=retry_if_exception_type(
89
+ (RateLimitError, APIConnectionError, APITimeoutError)
90
+ ),
91
+ )
92
+ async def llama_index_complete_if_cache(
93
+ model: str,
94
+ prompt: str,
95
+ system_prompt: Optional[str] = None,
96
+ history_messages: List[dict] = [],
97
+ **kwargs,
98
+ ) -> str:
99
+ """Complete the prompt using LlamaIndex."""
100
+ try:
101
+ # Format messages for chat
102
+ formatted_messages = []
103
+
104
+ # Add system message if provided
105
+ if system_prompt:
106
+ formatted_messages.append(
107
+ ChatMessage(role=MessageRole.SYSTEM, content=system_prompt)
108
+ )
109
+
110
+ # Add history messages
111
+ for msg in history_messages:
112
+ formatted_messages.append(
113
+ ChatMessage(
114
+ role=MessageRole.USER
115
+ if msg["role"] == "user"
116
+ else MessageRole.ASSISTANT,
117
+ content=msg["content"],
118
+ )
119
+ )
120
+
121
+ # Add current prompt
122
+ formatted_messages.append(ChatMessage(role=MessageRole.USER, content=prompt))
123
+
124
+ # Get LLM instance from kwargs
125
+ if "llm_instance" not in kwargs:
126
+ raise ValueError("llm_instance must be provided in kwargs")
127
+ llm = kwargs["llm_instance"]
128
+
129
+ # Get response
130
+ response: ChatResponse = await llm.achat(messages=formatted_messages)
131
+
132
+ # In newer versions, the response is in message.content
133
+ content = response.message.content
134
+ return content
135
+
136
+ except Exception as e:
137
+ logger.error(f"Error in llama_index_complete_if_cache: {str(e)}")
138
+ raise
139
+
140
+
141
+ async def llama_index_complete(
142
+ prompt,
143
+ system_prompt=None,
144
+ history_messages=None,
145
+ keyword_extraction=False,
146
+ settings: LlamaIndexSettings = None,
147
+ **kwargs,
148
+ ) -> str:
149
+ """
150
+ Main completion function for LlamaIndex
151
+
152
+ Args:
153
+ prompt: Input prompt
154
+ system_prompt: Optional system prompt
155
+ history_messages: Optional chat history
156
+ keyword_extraction: Whether to extract keywords from response
157
+ settings: Optional LlamaIndex settings
158
+ **kwargs: Additional arguments
159
+ """
160
+ if history_messages is None:
161
+ history_messages = []
162
+
163
+ keyword_extraction = kwargs.pop("keyword_extraction", None)
164
+ result = await llama_index_complete_if_cache(
165
+ kwargs.get("llm_instance"),
166
+ prompt,
167
+ system_prompt=system_prompt,
168
+ history_messages=history_messages,
169
+ **kwargs,
170
+ )
171
+ if keyword_extraction:
172
+ return locate_json_string_body_from_string(result)
173
+ return result
174
+
175
+
176
+ @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
177
+ @retry(
178
+ stop=stop_after_attempt(3),
179
+ wait=wait_exponential(multiplier=1, min=4, max=60),
180
+ retry=retry_if_exception_type(
181
+ (RateLimitError, APIConnectionError, APITimeoutError)
182
+ ),
183
+ )
184
+ async def llama_index_embed(
185
+ texts: list[str],
186
+ embed_model: BaseEmbedding = None,
187
+ settings: LlamaIndexSettings = None,
188
+ **kwargs,
189
+ ) -> np.ndarray:
190
+ """
191
+ Generate embeddings using LlamaIndex
192
+
193
+ Args:
194
+ texts: List of texts to embed
195
+ embed_model: LlamaIndex embedding model
196
+ settings: Optional LlamaIndex settings
197
+ **kwargs: Additional arguments
198
+ """
199
+ if settings:
200
+ configure_llama_index(settings)
201
+
202
+ if embed_model is None:
203
+ raise ValueError("embed_model must be provided")
204
+
205
+ # Use _get_text_embeddings for batch processing
206
+ embeddings = embed_model._get_text_embeddings(texts)
207
+ return np.array(embeddings)