|
""" |
|
Example of directly using modal processors |
|
|
|
This example demonstrates how to use LightRAG's modal processors directly without going through MinerU. |
|
""" |
|
|
|
import asyncio |
|
import argparse |
|
from lightrag.llm.openai import openai_complete_if_cache, openai_embed |
|
from lightrag.kg.shared_storage import initialize_pipeline_status |
|
from lightrag import LightRAG |
|
from lightrag.utils import EmbeddingFunc |
|
from raganything.modalprocessors import ( |
|
ImageModalProcessor, |
|
TableModalProcessor, |
|
EquationModalProcessor, |
|
) |
|
|
|
WORKING_DIR = "./rag_storage" |
|
|
|
|
|
def get_llm_model_func(api_key: str, base_url: str = None): |
|
return ( |
|
lambda prompt, |
|
system_prompt=None, |
|
history_messages=[], |
|
**kwargs: openai_complete_if_cache( |
|
"gpt-4o-mini", |
|
prompt, |
|
system_prompt=system_prompt, |
|
history_messages=history_messages, |
|
api_key=api_key, |
|
base_url=base_url, |
|
**kwargs, |
|
) |
|
) |
|
|
|
|
|
def get_vision_model_func(api_key: str, base_url: str = None): |
|
return ( |
|
lambda prompt, |
|
system_prompt=None, |
|
history_messages=[], |
|
image_data=None, |
|
**kwargs: openai_complete_if_cache( |
|
"gpt-4o", |
|
"", |
|
system_prompt=None, |
|
history_messages=[], |
|
messages=[ |
|
{"role": "system", "content": system_prompt} if system_prompt else None, |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": prompt}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{image_data}" |
|
}, |
|
}, |
|
], |
|
} |
|
if image_data |
|
else {"role": "user", "content": prompt}, |
|
], |
|
api_key=api_key, |
|
base_url=base_url, |
|
**kwargs, |
|
) |
|
if image_data |
|
else openai_complete_if_cache( |
|
"gpt-4o-mini", |
|
prompt, |
|
system_prompt=system_prompt, |
|
history_messages=history_messages, |
|
api_key=api_key, |
|
base_url=base_url, |
|
**kwargs, |
|
) |
|
) |
|
|
|
|
|
async def process_image_example(lightrag: LightRAG, vision_model_func): |
|
"""Example of processing an image""" |
|
|
|
image_processor = ImageModalProcessor( |
|
lightrag=lightrag, modal_caption_func=vision_model_func |
|
) |
|
|
|
|
|
image_content = { |
|
"img_path": "image.jpg", |
|
"img_caption": ["Example image caption"], |
|
"img_footnote": ["Example image footnote"], |
|
} |
|
|
|
|
|
description, entity_info = await image_processor.process_multimodal_content( |
|
modal_content=image_content, |
|
content_type="image", |
|
file_path="image_example.jpg", |
|
entity_name="Example Image", |
|
) |
|
|
|
print("Image Processing Results:") |
|
print(f"Description: {description}") |
|
print(f"Entity Info: {entity_info}") |
|
|
|
|
|
async def process_table_example(lightrag: LightRAG, llm_model_func): |
|
"""Example of processing a table""" |
|
|
|
table_processor = TableModalProcessor( |
|
lightrag=lightrag, modal_caption_func=llm_model_func |
|
) |
|
|
|
|
|
table_content = { |
|
"table_body": """ |
|
| Name | Age | Occupation | |
|
|------|-----|------------| |
|
| John | 25 | Engineer | |
|
| Mary | 30 | Designer | |
|
""", |
|
"table_caption": ["Employee Information Table"], |
|
"table_footnote": ["Data updated as of 2024"], |
|
} |
|
|
|
|
|
description, entity_info = await table_processor.process_multimodal_content( |
|
modal_content=table_content, |
|
content_type="table", |
|
file_path="table_example.md", |
|
entity_name="Employee Table", |
|
) |
|
|
|
print("\nTable Processing Results:") |
|
print(f"Description: {description}") |
|
print(f"Entity Info: {entity_info}") |
|
|
|
|
|
async def process_equation_example(lightrag: LightRAG, llm_model_func): |
|
"""Example of processing a mathematical equation""" |
|
|
|
equation_processor = EquationModalProcessor( |
|
lightrag=lightrag, modal_caption_func=llm_model_func |
|
) |
|
|
|
|
|
equation_content = {"text": "E = mc^2", "text_format": "LaTeX"} |
|
|
|
|
|
description, entity_info = await equation_processor.process_multimodal_content( |
|
modal_content=equation_content, |
|
content_type="equation", |
|
file_path="equation_example.txt", |
|
entity_name="Mass-Energy Equivalence", |
|
) |
|
|
|
print("\nEquation Processing Results:") |
|
print(f"Description: {description}") |
|
print(f"Entity Info: {entity_info}") |
|
|
|
|
|
async def initialize_rag(api_key: str, base_url: str = None): |
|
rag = LightRAG( |
|
working_dir=WORKING_DIR, |
|
embedding_func=EmbeddingFunc( |
|
embedding_dim=3072, |
|
max_token_size=8192, |
|
func=lambda texts: openai_embed( |
|
texts, |
|
model="text-embedding-3-large", |
|
api_key=api_key, |
|
base_url=base_url, |
|
), |
|
), |
|
llm_model_func=lambda prompt, |
|
system_prompt=None, |
|
history_messages=[], |
|
**kwargs: openai_complete_if_cache( |
|
"gpt-4o-mini", |
|
prompt, |
|
system_prompt=system_prompt, |
|
history_messages=history_messages, |
|
api_key=api_key, |
|
base_url=base_url, |
|
**kwargs, |
|
), |
|
) |
|
|
|
await rag.initialize_storages() |
|
await initialize_pipeline_status() |
|
|
|
return rag |
|
|
|
|
|
def main(): |
|
"""Main function to run the example""" |
|
parser = argparse.ArgumentParser(description="Modal Processors Example") |
|
parser.add_argument("--api-key", required=True, help="OpenAI API key") |
|
parser.add_argument("--base-url", help="Optional base URL for API") |
|
parser.add_argument( |
|
"--working-dir", "-w", default=WORKING_DIR, help="Working directory path" |
|
) |
|
|
|
args = parser.parse_args() |
|
|
|
|
|
asyncio.run(main_async(args.api_key, args.base_url)) |
|
|
|
|
|
async def main_async(api_key: str, base_url: str = None): |
|
|
|
lightrag = await initialize_rag(api_key, base_url) |
|
|
|
|
|
llm_model_func = get_llm_model_func(api_key, base_url) |
|
vision_model_func = get_vision_model_func(api_key, base_url) |
|
|
|
|
|
await process_image_example(lightrag, vision_model_func) |
|
await process_table_example(lightrag, llm_model_func) |
|
await process_equation_example(lightrag, llm_model_func) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|