File size: 2,619 Bytes
046051b
 
 
 
 
 
 
 
df22b26
046051b
 
 
df22b26
046051b
 
 
 
 
 
 
 
 
 
df22b26
046051b
 
df22b26
046051b
 
 
 
 
df22b26
046051b
 
df22b26
7441782
 
 
 
 
 
 
046051b
 
 
 
 
 
 
 
df22b26
7441782
 
 
 
 
 
046051b
7441782
 
 
 
 
 
 
046051b
 
7441782
 
046051b
7441782
 
 
 
046051b
7441782
 
 
 
046051b
7441782
 
 
 
046051b
7441782
 
 
 
 
 
046051b
7441782
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
import asyncio
from lightrag import LightRAG, QueryParam
from lightrag.llm import openai_complete_if_cache, openai_embedding
from lightrag.utils import EmbeddingFunc
import numpy as np

WORKING_DIR = "./dickens"

if not os.path.exists(WORKING_DIR):
    os.mkdir(WORKING_DIR)


async def llm_model_func(
    prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
    return await openai_complete_if_cache(
        "solar-mini",
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        api_key=os.getenv("UPSTAGE_API_KEY"),
        base_url="https://api.upstage.ai/v1/solar",
        **kwargs,
    )


async def embedding_func(texts: list[str]) -> np.ndarray:
    return await openai_embedding(
        texts,
        model="solar-embedding-1-large-query",
        api_key=os.getenv("UPSTAGE_API_KEY"),
        base_url="https://api.upstage.ai/v1/solar",
    )


async def get_embedding_dim():
    test_text = ["This is a test sentence."]
    embedding = await embedding_func(test_text)
    embedding_dim = embedding.shape[1]
    return embedding_dim


# function test
async def test_funcs():
    result = await llm_model_func("How are you?")
    print("llm_model_func: ", result)

    result = await embedding_func(["How are you?"])
    print("embedding_func: ", result)


# asyncio.run(test_funcs())

async def main():
    try:
        embedding_dimension = await get_embedding_dim()
        print(f"Detected embedding dimension: {embedding_dimension}")

        rag = LightRAG(
            working_dir=WORKING_DIR,
            llm_model_func=llm_model_func,
            embedding_func=EmbeddingFunc(
                embedding_dim=embedding_dimension, max_token_size=8192, func=embedding_func
            ),
        )


        with open("./book.txt", "r", encoding="utf-8") as f:
            rag.insert(f.read())

        # Perform naive search
        print(
            rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
        )

        # Perform local search
        print(
            rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
        )

        # Perform global search
        print(
            rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
        )

        # Perform hybrid search
        print(
            rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
        )
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    asyncio.run(main())