Spaces:

rm-lht
/

lightrag

Configuration error

lightrag / examples /query_keyword_separation_example.py

Gurjot Singh

Fix linting errors

3f44683 8 months ago

3.4 kB

	import os
	import asyncio
	from lightrag import LightRAG, QueryParam
	from lightrag.utils import EmbeddingFunc
	import numpy as np
	from dotenv import load_dotenv
	import logging
	from openai import AzureOpenAI

	logging.basicConfig(level=logging.INFO)

	load_dotenv()

	AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
	AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
	AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
	AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")

	AZURE_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_EMBEDDING_DEPLOYMENT")
	AZURE_EMBEDDING_API_VERSION = os.getenv("AZURE_EMBEDDING_API_VERSION")

	WORKING_DIR = "./dickens"

	if os.path.exists(WORKING_DIR):
	import shutil

	shutil.rmtree(WORKING_DIR)

	os.mkdir(WORKING_DIR)


	async def llm_model_func(
	prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
	) -> str:
	client = AzureOpenAI(
	api_key=AZURE_OPENAI_API_KEY,
	api_version=AZURE_OPENAI_API_VERSION,
	azure_endpoint=AZURE_OPENAI_ENDPOINT,
	)

	messages = []
	if system_prompt:
	messages.append({"role": "system", "content": system_prompt})
	if history_messages:
	messages.extend(history_messages)
	messages.append({"role": "user", "content": prompt})

	chat_completion = client.chat.completions.create(
	model=AZURE_OPENAI_DEPLOYMENT, # model = "deployment_name".
	messages=messages,
	temperature=kwargs.get("temperature", 0),
	top_p=kwargs.get("top_p", 1),
	n=kwargs.get("n", 1),
	)
	return chat_completion.choices[0].message.content


	async def embedding_func(texts: list[str]) -> np.ndarray:
	client = AzureOpenAI(
	api_key=AZURE_OPENAI_API_KEY,
	api_version=AZURE_EMBEDDING_API_VERSION,
	azure_endpoint=AZURE_OPENAI_ENDPOINT,
	)
	embedding = client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts)

	embeddings = [item.embedding for item in embedding.data]
	return np.array(embeddings)


	async def test_funcs():
	result = await llm_model_func("How are you?")
	print("Resposta do llm_model_func: ", result)

	result = await embedding_func(["How are you?"])
	print("Resultado do embedding_func: ", result.shape)
	print("Dimensão da embedding: ", result.shape[1])


	asyncio.run(test_funcs())

	embedding_dimension = 3072

	rag = LightRAG(
	working_dir=WORKING_DIR,
	llm_model_func=llm_model_func,
	embedding_func=EmbeddingFunc(
	embedding_dim=embedding_dimension,
	max_token_size=8192,
	func=embedding_func,
	),
	)

	book1 = open("./book_1.txt", encoding="utf-8")
	book2 = open("./book_2.txt", encoding="utf-8")

	rag.insert([book1.read(), book2.read()])


	# Example function demonstrating the new query_with_separate_keyword_extraction usage
	async def run_example():
	query = "What are the top themes in this story?"
	prompt = "Please simplify the response for a young audience."

	# Using the new method to ensure the keyword extraction is only applied to the query
	response = rag.query_with_separate_keyword_extraction(
	query=query,
	prompt=prompt,
	param=QueryParam(mode="hybrid"), # Adjust QueryParam mode as necessary
	)

	print("Extracted Response:", response)


	# Run the example asynchronously
	if __name__ == "__main__":
	asyncio.run(run_example())