File size: 6,138 Bytes
4460ba5 8b3b01c ca6e075 4460ba5 9ce8151 8b3b01c ca6e075 4460ba5 8f067b7 4460ba5 77d8960 ca6e075 77d8960 ca6e075 77d8960 ca6e075 77d8960 4460ba5 275e33e 8b3b01c 275e33e 28e64e8 4587a99 79f312f 4587a99 79f312f 4587a99 79f312f 275e33e 4460ba5 79f312f 275e33e 4460ba5 79f312f d3427f3 79f312f 275e33e 4460ba5 79f312f d3427f3 79f312f 275e33e 79f312f 4460ba5 275e33e 8b3b01c ca6e075 28e64e8 79f312f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import os
import asyncio
import logging
import logging.config
from lightrag import LightRAG, QueryParam
from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import logger, set_verbose_debug
WORKING_DIR = "./dickens"
def configure_logging():
"""Configure logging for the application"""
# Reset any existing handlers to ensure clean configuration
for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
logger_instance = logging.getLogger(logger_name)
logger_instance.handlers = []
logger_instance.filters = []
# Get log directory path from environment variable or use current directory
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag_demo.log"))
print(f"\nLightRAG demo log file: {log_file_path}\n")
os.makedirs(os.path.dirname(log_dir), exist_ok=True)
# Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
logging.config.dictConfig(
{
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"default": {
"format": "%(levelname)s: %(message)s",
},
"detailed": {
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
},
},
"handlers": {
"console": {
"formatter": "default",
"class": "logging.StreamHandler",
"stream": "ext://sys.stderr",
},
"file": {
"formatter": "detailed",
"class": "logging.handlers.RotatingFileHandler",
"filename": log_file_path,
"maxBytes": log_max_bytes,
"backupCount": log_backup_count,
"encoding": "utf-8",
},
},
"loggers": {
"lightrag": {
"handlers": ["console", "file"],
"level": "INFO",
"propagate": False,
},
},
}
)
# Set the logger level to INFO
logger.setLevel(logging.INFO)
# Enable verbose debug if needed
set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
async def initialize_rag():
rag = LightRAG(
working_dir=WORKING_DIR,
embedding_func=openai_embed,
llm_model_func=gpt_4o_mini_complete,
)
await rag.initialize_storages()
await initialize_pipeline_status()
return rag
async def main():
# Check if OPENAI_API_KEY environment variable exists
if not os.getenv("OPENAI_API_KEY"):
print(
"Error: OPENAI_API_KEY environment variable is not set. Please set this variable before running the program."
)
print("You can set the environment variable by running:")
print(" export OPENAI_API_KEY='your-openai-api-key'")
return # Exit the async function
try:
# Clear old data files
files_to_delete = [
"graph_chunk_entity_relation.graphml",
"kv_store_doc_status.json",
"kv_store_full_docs.json",
"kv_store_text_chunks.json",
"vdb_chunks.json",
"vdb_entities.json",
"vdb_relationships.json",
]
for file in files_to_delete:
file_path = os.path.join(WORKING_DIR, file)
if os.path.exists(file_path):
os.remove(file_path)
print(f"Deleting old file:: {file_path}")
# Initialize RAG instance
rag = await initialize_rag()
# Test embedding function
test_text = ["This is a test string for embedding."]
embedding = await rag.embedding_func(test_text)
embedding_dim = embedding.shape[1]
print("\n=======================")
print("Test embedding function")
print("========================")
print(f"Test dict: {test_text}")
print(f"Detected embedding dimension: {embedding_dim}\n\n")
with open("./book.txt", "r", encoding="utf-8") as f:
await rag.ainsert(f.read())
# Perform naive search
print("\n=====================")
print("Query mode: naive")
print("=====================")
print(
await rag.aquery(
"What are the top themes in this story?", param=QueryParam(mode="naive")
)
)
# Perform local search
print("\n=====================")
print("Query mode: local")
print("=====================")
print(
await rag.aquery(
"What are the top themes in this story?", param=QueryParam(mode="local")
)
)
# Perform global search
print("\n=====================")
print("Query mode: global")
print("=====================")
print(
await rag.aquery(
"What are the top themes in this story?",
param=QueryParam(mode="global"),
)
)
# Perform hybrid search
print("\n=====================")
print("Query mode: hybrid")
print("=====================")
print(
await rag.aquery(
"What are the top themes in this story?",
param=QueryParam(mode="hybrid"),
)
)
except Exception as e:
print(f"An error occurred: {e}")
finally:
if rag:
await rag.finalize_storages()
if __name__ == "__main__":
# Configure logging before running the main function
configure_logging()
asyncio.run(main())
print("\nDone!")
|