Larfii
commited on
Commit
Β·
164ec61
1
Parent(s):
38e1d84
update
Browse files- README.md +168 -3
- examples/batch_eval.py +2 -2
- examples/generate_query.py +1 -1
- examples/insert.py +1 -1
- examples/query.py +1 -1
- lightrag/__pycache__/__init__.cpython-310.pyc +0 -0
- lightrag/__pycache__/base.cpython-310.pyc +0 -0
- lightrag/__pycache__/lightrag.cpython-310.pyc +0 -0
- lightrag/__pycache__/llm.cpython-310.pyc +0 -0
- lightrag/__pycache__/myrag.cpython-310.pyc +0 -0
- lightrag/__pycache__/operate.cpython-310.pyc +0 -0
- lightrag/__pycache__/prompt.cpython-310.pyc +0 -0
- lightrag/__pycache__/storage.cpython-310.pyc +0 -0
- lightrag/__pycache__/utils.cpython-310.pyc +0 -0
- setup.py +1 -1
README.md
CHANGED
@@ -1,6 +1,171 @@
|
|
1 |
-
# LightRAG
|
|
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
## Citation
|
4 |
-
## Acknowledgement
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# LightRAG: Simple and Fast Retrieval-Augmented Generation
|
2 |
+
<img src='' />
|
3 |
|
4 |
+
<a href='https://github.com/HKUDS/GraphEdit'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
|
5 |
+
<a href='https://arxiv.org/abs/'><img src='https://img.shields.io/badge/arXiv--b31b1b'></a>
|
6 |
+
|
7 |
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
|
8 |
+
## Install
|
9 |
+
|
10 |
+
* Install from source
|
11 |
+
|
12 |
+
```
|
13 |
+
cd LightRAG
|
14 |
+
pip install -e .
|
15 |
+
```
|
16 |
+
* Install from PyPI
|
17 |
+
```
|
18 |
+
pip install lightrag-hku
|
19 |
+
```
|
20 |
+
|
21 |
+
## Quick Start
|
22 |
+
|
23 |
+
* Set OpenAI API key in environment: `export OPENAI_API_KEY="sk-...".`
|
24 |
+
* Download the demo text "A Christmas Carol by Charles Dickens"
|
25 |
+
```
|
26 |
+
curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
|
27 |
+
```
|
28 |
+
Use the below python snippet:
|
29 |
+
|
30 |
+
```
|
31 |
+
from lightrag import LightRAG, QueryParam
|
32 |
+
|
33 |
+
rag = LightRAG(working_dir="./dickens")
|
34 |
+
|
35 |
+
with open("./book.txt") as f:
|
36 |
+
rag.insert(f.read())
|
37 |
+
|
38 |
+
# Perform naive search
|
39 |
+
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
|
40 |
+
|
41 |
+
# Perform local search
|
42 |
+
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
|
43 |
+
|
44 |
+
# Perform global search
|
45 |
+
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
|
46 |
+
|
47 |
+
# Perform hybird search
|
48 |
+
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybird")))
|
49 |
+
```
|
50 |
+
Batch Insert
|
51 |
+
```
|
52 |
+
rag.insert(["TEXT1", "TEXT2",...])
|
53 |
+
```
|
54 |
+
Incremental Insert
|
55 |
+
|
56 |
+
```
|
57 |
+
rag = LightRAG(working_dir="./dickens")
|
58 |
+
|
59 |
+
with open("./newText.txt") as f:
|
60 |
+
rag.insert(f.read())
|
61 |
+
```
|
62 |
+
## Evaluation
|
63 |
+
### Dataset
|
64 |
+
The dataset used in LightRAG can be download from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
|
65 |
+
|
66 |
+
### Generate Query
|
67 |
+
LightRAG uses the following prompt to generate high-level queries, with the corresponding code located in `example/generate_query.py`.
|
68 |
+
```
|
69 |
+
Given the following description of a dataset:
|
70 |
+
|
71 |
+
{description}
|
72 |
+
|
73 |
+
Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
|
74 |
+
|
75 |
+
Output the results in the following structure:
|
76 |
+
- User 1: [user description]
|
77 |
+
- Task 1: [task description]
|
78 |
+
- Question 1:
|
79 |
+
- Question 2:
|
80 |
+
- Question 3:
|
81 |
+
- Question 4:
|
82 |
+
- Question 5:
|
83 |
+
- Task 2: [task description]
|
84 |
+
...
|
85 |
+
- Task 5: [task description]
|
86 |
+
- User 2: [user description]
|
87 |
+
...
|
88 |
+
- User 5: [user description]
|
89 |
+
...
|
90 |
+
```
|
91 |
+
|
92 |
+
### Batch Eval
|
93 |
+
To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`.
|
94 |
+
```
|
95 |
+
---Role---
|
96 |
+
You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
|
97 |
+
---Goal---
|
98 |
+
You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
|
99 |
+
|
100 |
+
- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
|
101 |
+
- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
|
102 |
+
- **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
|
103 |
+
|
104 |
+
For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
|
105 |
+
|
106 |
+
Here is the question:
|
107 |
+
{query}
|
108 |
+
|
109 |
+
Here are the two answers:
|
110 |
+
|
111 |
+
**Answer 1:**
|
112 |
+
{answer1}
|
113 |
+
|
114 |
+
**Answer 2:**
|
115 |
+
{answer2}
|
116 |
+
|
117 |
+
Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
|
118 |
+
|
119 |
+
Output your evaluation in the following JSON format:
|
120 |
+
|
121 |
+
{{
|
122 |
+
"Comprehensiveness": {{
|
123 |
+
"Winner": "[Answer 1 or Answer 2]",
|
124 |
+
"Explanation": "[Provide explanation here]"
|
125 |
+
}},
|
126 |
+
"Empowerment": {{
|
127 |
+
"Winner": "[Answer 1 or Answer 2]",
|
128 |
+
"Explanation": "[Provide explanation here]"
|
129 |
+
}},
|
130 |
+
"Overall Winner": {{
|
131 |
+
"Winner": "[Answer 1 or Answer 2]",
|
132 |
+
"Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
|
133 |
+
}}
|
134 |
+
}}
|
135 |
+
```
|
136 |
+
## Code Structure
|
137 |
+
|
138 |
+
```
|
139 |
+
.
|
140 |
+
βββ examples
|
141 |
+
β βββ batch_eval.py
|
142 |
+
β βββ generate_query.py
|
143 |
+
β βββ insert.py
|
144 |
+
β βββ query.py
|
145 |
+
βββ lightrag
|
146 |
+
β βββ __init__.py
|
147 |
+
β βββ base.py
|
148 |
+
β βββ lightrag.py
|
149 |
+
β βββ llm.py
|
150 |
+
β βββ operate.py
|
151 |
+
β βββ prompt.py
|
152 |
+
β βββ storage.py
|
153 |
+
β βββ utils.jpeg
|
154 |
+
βββ LICENSE
|
155 |
+
βββ README.md
|
156 |
+
βββ requirements.txt
|
157 |
+
βββ setup.py
|
158 |
+
```
|
159 |
## Citation
|
|
|
160 |
|
161 |
+
```
|
162 |
+
@article{guo2024lightrag,
|
163 |
+
title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
|
164 |
+
author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
|
165 |
+
year={2024},
|
166 |
+
eprint={},
|
167 |
+
archivePrefix={arXiv},
|
168 |
+
primaryClass={cs.IR}
|
169 |
+
}
|
170 |
+
```
|
171 |
+
|
examples/batch_eval.py
CHANGED
@@ -6,8 +6,8 @@ import jsonlines
|
|
6 |
from openai import OpenAI
|
7 |
|
8 |
|
9 |
-
def batch_eval(query_file, result1_file, result2_file, output_file_path
|
10 |
-
client = OpenAI(
|
11 |
|
12 |
with open(query_file, 'r') as f:
|
13 |
data = f.read()
|
|
|
6 |
from openai import OpenAI
|
7 |
|
8 |
|
9 |
+
def batch_eval(query_file, result1_file, result2_file, output_file_path):
|
10 |
+
client = OpenAI()
|
11 |
|
12 |
with open(query_file, 'r') as f:
|
13 |
data = f.read()
|
examples/generate_query.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
|
3 |
from openai import OpenAI
|
4 |
|
5 |
-
os.environ["OPENAI_API_KEY"] = ""
|
6 |
|
7 |
def openai_complete_if_cache(
|
8 |
model="gpt-4o-mini", prompt=None, system_prompt=None, history_messages=[], **kwargs
|
|
|
2 |
|
3 |
from openai import OpenAI
|
4 |
|
5 |
+
# os.environ["OPENAI_API_KEY"] = ""
|
6 |
|
7 |
def openai_complete_if_cache(
|
8 |
model="gpt-4o-mini", prompt=None, system_prompt=None, history_messages=[], **kwargs
|
examples/insert.py
CHANGED
@@ -3,7 +3,7 @@ import sys
|
|
3 |
|
4 |
from lightrag import LightRAG
|
5 |
|
6 |
-
os.environ["OPENAI_API_KEY"] = ""
|
7 |
|
8 |
WORKING_DIR = ""
|
9 |
|
|
|
3 |
|
4 |
from lightrag import LightRAG
|
5 |
|
6 |
+
# os.environ["OPENAI_API_KEY"] = ""
|
7 |
|
8 |
WORKING_DIR = ""
|
9 |
|
examples/query.py
CHANGED
@@ -3,7 +3,7 @@ import sys
|
|
3 |
|
4 |
from lightrag import LightRAG, QueryParam
|
5 |
|
6 |
-
os.environ["OPENAI_API_KEY"] = ""
|
7 |
|
8 |
WORKING_DIR = ""
|
9 |
|
|
|
3 |
|
4 |
from lightrag import LightRAG, QueryParam
|
5 |
|
6 |
+
# os.environ["OPENAI_API_KEY"] = ""
|
7 |
|
8 |
WORKING_DIR = ""
|
9 |
|
lightrag/__pycache__/__init__.cpython-310.pyc
DELETED
Binary file (307 Bytes)
|
|
lightrag/__pycache__/base.cpython-310.pyc
DELETED
Binary file (5.5 kB)
|
|
lightrag/__pycache__/lightrag.cpython-310.pyc
DELETED
Binary file (7.6 kB)
|
|
lightrag/__pycache__/llm.cpython-310.pyc
DELETED
Binary file (2.52 kB)
|
|
lightrag/__pycache__/myrag.cpython-310.pyc
DELETED
Binary file (7.89 kB)
|
|
lightrag/__pycache__/operate.cpython-310.pyc
DELETED
Binary file (23.6 kB)
|
|
lightrag/__pycache__/prompt.cpython-310.pyc
DELETED
Binary file (17.6 kB)
|
|
lightrag/__pycache__/storage.cpython-310.pyc
DELETED
Binary file (12.2 kB)
|
|
lightrag/__pycache__/utils.cpython-310.pyc
DELETED
Binary file (6.98 kB)
|
|
setup.py
CHANGED
@@ -21,7 +21,7 @@ with open("./requirements.txt") as f:
|
|
21 |
deps.append(line.strip())
|
22 |
|
23 |
setuptools.setup(
|
24 |
-
name="
|
25 |
url=vars2readme["__url__"],
|
26 |
version=vars2readme["__version__"],
|
27 |
author=vars2readme["__author__"],
|
|
|
21 |
deps.append(line.strip())
|
22 |
|
23 |
setuptools.setup(
|
24 |
+
name="light-rag",
|
25 |
url=vars2readme["__url__"],
|
26 |
version=vars2readme["__version__"],
|
27 |
author=vars2readme["__author__"],
|