Tan665 commited on
Commit
5a3ba21
·
1 Parent(s): 0649cc6

added nvidia text-embedding API and example of using nvidia API llm and text-embedding

Browse files
Files changed (2) hide show
  1. examples/lightrag_nvidia_demo.py +159 -0
  2. lightrag/llm.py +40 -0
examples/lightrag_nvidia_demo.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ from lightrag import LightRAG, QueryParam
4
+ from lightrag.llm import openai_complete_if_cache, nvidia_openai_embedding, nvidia_openai_complete
5
+ from lightrag.utils import EmbeddingFunc
6
+ import numpy as np
7
+
8
+ #for custom llm_model_func
9
+ from lightrag.utils import locate_json_string_body_from_string
10
+
11
+ WORKING_DIR = "./dickens"
12
+
13
+ if not os.path.exists(WORKING_DIR):
14
+ os.mkdir(WORKING_DIR)
15
+
16
+ #some method to use your API key (choose one)
17
+ # NVIDIA_OPENAI_API_KEY = os.getenv("NVIDIA_OPENAI_API_KEY")
18
+ NVIDIA_OPENAI_API_KEY = "nvapi-xxxx" #your api key
19
+
20
+ # using pre-defined function for nvidia LLM API. OpenAI compatible
21
+ # llm_model_func = nvidia_openai_complete
22
+
23
+ #If you trying to make custom llm_model_func to use llm model on NVIDIA API like other example:
24
+ async def llm_model_func(
25
+ prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
26
+ ) -> str:
27
+ result = await openai_complete_if_cache(
28
+ "nvidia/llama-3.1-nemotron-70b-instruct",
29
+ prompt,
30
+ system_prompt=system_prompt,
31
+ history_messages=history_messages,
32
+ api_key=NVIDIA_OPENAI_API_KEY,
33
+ base_url="https://integrate.api.nvidia.com/v1",
34
+ **kwargs,
35
+ )
36
+ if keyword_extraction:
37
+ return locate_json_string_body_from_string(result)
38
+ return result
39
+
40
+ #custom embedding
41
+ nvidia_embed_model = "nvidia/nv-embedqa-e5-v5"
42
+ async def indexing_embedding_func(texts: list[str]) -> np.ndarray:
43
+ return await nvidia_openai_embedding(
44
+ texts,
45
+ model = nvidia_embed_model, #maximum 512 token
46
+ # model="nvidia/llama-3.2-nv-embedqa-1b-v1",
47
+ api_key=NVIDIA_OPENAI_API_KEY,
48
+ base_url="https://integrate.api.nvidia.com/v1",
49
+ input_type = "passage",
50
+ trunc = "END", #handling on server side if input token is longer than maximum token
51
+ encode = "float"
52
+ )
53
+
54
+ async def query_embedding_func(texts: list[str]) -> np.ndarray:
55
+ return await nvidia_openai_embedding(
56
+ texts,
57
+ model = nvidia_embed_model, #maximum 512 token
58
+ # model="nvidia/llama-3.2-nv-embedqa-1b-v1",
59
+ api_key=NVIDIA_OPENAI_API_KEY,
60
+ base_url="https://integrate.api.nvidia.com/v1",
61
+ input_type = "query",
62
+ trunc = "END", #handling on server side if input token is longer than maximum token
63
+ encode = "float"
64
+ )
65
+
66
+ #dimension are same
67
+ async def get_embedding_dim():
68
+ test_text = ["This is a test sentence."]
69
+ embedding = await indexing_embedding_func(test_text)
70
+ embedding_dim = embedding.shape[1]
71
+ return embedding_dim
72
+
73
+
74
+ # function test
75
+ async def test_funcs():
76
+ result = await llm_model_func("How are you?")
77
+ print("llm_model_func: ", result)
78
+
79
+ result = await indexing_embedding_func(["How are you?"])
80
+ print("embedding_func: ", result)
81
+
82
+
83
+ # asyncio.run(test_funcs())
84
+
85
+
86
+ async def main():
87
+ try:
88
+ embedding_dimension = await get_embedding_dim()
89
+ print(f"Detected embedding dimension: {embedding_dimension}")
90
+
91
+ #lightRAG class during indexing
92
+ rag = LightRAG(
93
+ working_dir=WORKING_DIR,
94
+ llm_model_func=llm_model_func,
95
+ # llm_model_name="meta/llama3-70b-instruct", #un comment if
96
+ embedding_func=EmbeddingFunc(
97
+ embedding_dim=embedding_dimension,
98
+ max_token_size=512, #maximum token size, somehow it's still exceed maximum number of token
99
+ #so truncate (trunc) parameter on embedding_func will handle it and try to examine the tokenizer used in LightRAG
100
+ #so you can adjust to be able to fit the NVIDIA model (future work)
101
+ func=indexing_embedding_func,
102
+ ),
103
+ )
104
+
105
+ #reading file
106
+ with open("./book.txt", "r", encoding="utf-8") as f:
107
+ await rag.ainsert(f.read())
108
+
109
+ #redefine rag to change embedding into query type
110
+ rag = LightRAG(
111
+ working_dir=WORKING_DIR,
112
+ llm_model_func=llm_model_func,
113
+ # llm_model_name="meta/llama3-70b-instruct", #un comment if
114
+ embedding_func=EmbeddingFunc(
115
+ embedding_dim=embedding_dimension,
116
+ max_token_size=512,
117
+ func=query_embedding_func,
118
+ ),
119
+ )
120
+
121
+ # Perform naive search
122
+ print("==============Naive===============")
123
+ print(
124
+ await rag.aquery(
125
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
126
+ )
127
+ )
128
+
129
+ # Perform local search
130
+ print("==============local===============")
131
+ print(
132
+ await rag.aquery(
133
+ "What are the top themes in this story?", param=QueryParam(mode="local")
134
+ )
135
+ )
136
+
137
+ # Perform global search
138
+ print("==============global===============")
139
+ print(
140
+ await rag.aquery(
141
+ "What are the top themes in this story?",
142
+ param=QueryParam(mode="global"),
143
+ )
144
+ )
145
+
146
+ # Perform hybrid search
147
+ print("==============hybrid===============")
148
+ print(
149
+ await rag.aquery(
150
+ "What are the top themes in this story?",
151
+ param=QueryParam(mode="hybrid"),
152
+ )
153
+ )
154
+ except Exception as e:
155
+ print(f"An error occurred: {e}")
156
+
157
+
158
+ if __name__ == "__main__":
159
+ asyncio.run(main())
lightrag/llm.py CHANGED
@@ -502,6 +502,20 @@ async def gpt_4o_mini_complete(
502
  **kwargs,
503
  )
504
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
 
506
  async def azure_openai_complete(
507
  prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
@@ -588,6 +602,32 @@ async def openai_embedding(
588
  return np.array([dp.embedding for dp in response.data])
589
 
590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
592
  @retry(
593
  stop=stop_after_attempt(3),
 
502
  **kwargs,
503
  )
504
 
505
+ async def nvidia_openai_complete(
506
+ prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
507
+ ) -> str:
508
+ result = await openai_complete_if_cache(
509
+ "nvidia/llama-3.1-nemotron-70b-instruct", #context length 128k
510
+ prompt,
511
+ system_prompt=system_prompt,
512
+ history_messages=history_messages,
513
+ base_url="https://integrate.api.nvidia.com/v1",
514
+ **kwargs,
515
+ )
516
+ if keyword_extraction: # TODO: use JSON API
517
+ return locate_json_string_body_from_string(result)
518
+ return result
519
 
520
  async def azure_openai_complete(
521
  prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
 
602
  return np.array([dp.embedding for dp in response.data])
603
 
604
 
605
+ @wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
606
+ @retry(
607
+ stop=stop_after_attempt(3),
608
+ wait=wait_exponential(multiplier=1, min=4, max=60),
609
+ retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
610
+ )
611
+ async def nvidia_openai_embedding(
612
+ texts: list[str],
613
+ model: str = "nvidia/llama-3.2-nv-embedqa-1b-v1", #refer to https://build.nvidia.com/nim?filters=usecase%3Ausecase_text_to_embedding
614
+ base_url: str = "https://integrate.api.nvidia.com/v1",
615
+ api_key: str = None,
616
+ input_type: str = "passage", #query for retrieval, passage for embedding
617
+ trunc: str = "NONE", #NONE or START or END
618
+ encode: str = "float" #float or base64
619
+ ) -> np.ndarray:
620
+ if api_key:
621
+ os.environ["OPENAI_API_KEY"] = api_key
622
+
623
+ openai_async_client = (
624
+ AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
625
+ )
626
+ response = await openai_async_client.embeddings.create(
627
+ model=model, input=texts, encoding_format=encode, extra_body={"input_type": input_type, "truncate": trunc}
628
+ )
629
+ return np.array([dp.embedding for dp in response.data])
630
+
631
  @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
632
  @retry(
633
  stop=stop_after_attempt(3),