Spaces:

rm-lht
/

lightrag

Configuration error

zrguo commited on Nov 28, 2024

Commit

65da904

2 Parent(s): 51a5e11 72cd55c

Merge pull request #343 from sebastianschramm/ses/fix_language_templating_in_prompts

Files changed (2) hide show

lightrag/operate.py CHANGED Viewed

@@ -59,6 +59,9 @@ async def _handle_entity_relation_summary(
     llm_max_tokens = global_config["llm_model_max_token_size"]
     tiktoken_model_name = global_config["tiktoken_model_name"]
     summary_max_tokens = global_config["entity_summary_to_max_tokens"]
     tokens = encode_string_by_tiktoken(description, model_name=tiktoken_model_name)
     if len(tokens) < summary_max_tokens:  # No need for summary
@@ -70,6 +73,7 @@ async def _handle_entity_relation_summary(
     context_base = dict(
         entity_name=entity_or_relation_name,
         description_list=use_description.split(GRAPH_FIELD_SEP),
     )
     use_prompt = prompt_template.format(**context_base)
     logger.debug(f"Trigger summary: {entity_or_relation_name}")
@@ -444,6 +448,9 @@ async def kg_query(
         )
     else:
         examples = "\n".join(PROMPTS["keywords_extraction_examples"])
     # Set mode
     if query_param.mode not in ["local", "global", "hybrid"]:
@@ -453,7 +460,7 @@ async def kg_query(
     # LLM generate keywords
     use_model_func = global_config["llm_model_func"]
     kw_prompt_temp = PROMPTS["keywords_extraction"]
-    kw_prompt = kw_prompt_temp.format(query=query, examples=examples)
     result = await use_model_func(kw_prompt)
     logger.info("kw_prompt result:")
     print(result)

     llm_max_tokens = global_config["llm_model_max_token_size"]
     tiktoken_model_name = global_config["tiktoken_model_name"]
     summary_max_tokens = global_config["entity_summary_to_max_tokens"]
+    language = global_config["addon_params"].get(
+        "language", PROMPTS["DEFAULT_LANGUAGE"]
+    )
     tokens = encode_string_by_tiktoken(description, model_name=tiktoken_model_name)
     if len(tokens) < summary_max_tokens:  # No need for summary
     context_base = dict(
         entity_name=entity_or_relation_name,
         description_list=use_description.split(GRAPH_FIELD_SEP),
+        language=language,
     )
     use_prompt = prompt_template.format(**context_base)
     logger.debug(f"Trigger summary: {entity_or_relation_name}")
         )
     else:
         examples = "\n".join(PROMPTS["keywords_extraction_examples"])
+    language = global_config["addon_params"].get(
+        "language", PROMPTS["DEFAULT_LANGUAGE"]
+    )
     # Set mode
     if query_param.mode not in ["local", "global", "hybrid"]:
     # LLM generate keywords
     use_model_func = global_config["llm_model_func"]
     kw_prompt_temp = PROMPTS["keywords_extraction"]
+    kw_prompt = kw_prompt_temp.format(query=query, examples=examples, language=language)
     result = await use_model_func(kw_prompt)
     logger.info("kw_prompt result:")
     print(result)

lightrag/prompt.py CHANGED Viewed

@@ -33,7 +33,7 @@ Format each relationship as ("relationship"{tuple_delimiter}<source_entity>{tupl
 3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
 Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_level_keywords>)
-4. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
 5. When finished, output {completion_delimiter}
@@ -131,7 +131,7 @@ Given one or two entities, and a list of descriptions, all related to the same e
 Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
 If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
 Make sure it is written in third person, and include the entity names so we the have full context.
-Use Chinese as output language.
 #######
 -Data-
@@ -178,7 +178,7 @@ Add sections and commentary to the response as appropriate for the length and fo
 PROMPTS["keywords_extraction"] = """---Role---
 You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query.
-Use Chinese as output language.
 ---Goal---

 3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
 Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_level_keywords>)
+4. Return output in {language} as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
 5. When finished, output {completion_delimiter}
 Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
 If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
 Make sure it is written in third person, and include the entity names so we the have full context.
+Use {language} as output language.
 #######
 -Data-
 PROMPTS["keywords_extraction"] = """---Role---
 You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query.
+Use {language} as output language.
 ---Goal---