Spaces:

shigureui
/

Lunyu-Critique-MCP

Sleeping

App Files Files Community

shigureui commited on May 6

Commit

6ff3fec

1 Parent(s): 898699f

init

Browse files

Files changed (5) hide show

app.py +65 -18
prepare.py +27 -0
requirements.txt +2 -0
test.py +33 -0
反孔.json +0 -0

app.py CHANGED Viewed

@@ -1,28 +1,75 @@
 import gradio as gr
-def letter_counter(word, letter):
-    """
-    Count the number of occurrences of a letter in a word or text.
-    Args:
-        word (str): The input text to search through
-        letter (str): The letter to search for
-    Returns:
-        str: A message indicating how many times the letter appears
-    """
-    word = word.lower()
-    letter = letter.lower()
-    count = word.count(letter)
-    return count
 demo = gr.Interface(
-    fn=letter_counter,
-    inputs=["textbox", "textbox"],
-    outputs="number",
-    title="Letter Counter",
-    description="Enter text and a letter to count how many times the letter appears in the text."
 )
 if __name__ == "__main__":
     demo.launch(mcp_server=True)

+from whoosh.fields import TEXT, SchemaClass, ID
+from jieba.analyse import ChineseAnalyzer
+from whoosh.index import create_in
+import json
+analyzer = ChineseAnalyzer()
+class ArticleSchema(SchemaClass):
+    index = ID(stored=True)
+    原文 = TEXT(stored=True, analyzer=analyzer)
+    注释 = TEXT(stored=True, analyzer=analyzer)
+    批判 = TEXT(stored=True, analyzer=analyzer)
+    章节 = TEXT(stored=True, analyzer=analyzer)
+schema = ArticleSchema()
+ix = create_in("indexdir", schema, indexname='article_index')
+writer = ix.writer()
+with open("反孔.json", encoding="utf-8") as json_file:
+    raw_jsons = json.load(json_file)
+for vhjx_item in raw_jsons:
+    for jvvi_item in vhjx_item[1:]:
+        print(jvvi_item['index'])
+        writer.add_document(index=jvvi_item['index'], 原文=jvvi_item['原文'],
+                            注释=jvvi_item['注释'] if "注释" in jvvi_item else "", 批判=jvvi_item['批判'] if "批判" in jvvi_item else "", 章节=vhjx_item[0])
+writer.commit()
+# init
 import gradio as gr
+from whoosh.qparser import QueryParser
+from whoosh.index import open_dir
+import re
+from whoosh.query import Term
+ix = open_dir("indexdir", indexname='article_index')
+searcher = ix.searcher()
+def search(query_info):
+    query = QueryParser("原文", ix.schema).parse(query_info)
+    results = searcher.search(query)
+    map_hit = []
+    for hit in results:
+        批判文本 = hit.get("批判", "")
+        matches = re.findall(r'\d+[\·\.]\d+', 批判文本)
+        map_hit.append(dict(hit))
+        map_hit[-1]['extra'] = []
+        for index_ref in matches:
+            index_ref_normalized = index_ref.replace( '.' , '·')
+            term_query = Term("index", index_ref_normalized)
+            related_results = searcher.search(term_query)
+            for related_hit in related_results:
+                map_hit[-1]['extra'].append(dict(related_hit))
+    return map_hit
+def lunyu_search(query):
+    return search(query_info=query)
 demo = gr.Interface(
+    fn=lunyu_search,
+    inputs=["query"],
+    outputs="result",
+    title="论语批判MCP",
+    description="输入模糊的论语原文，可以查询到对应的批判内容。"
 )
 if __name__ == "__main__":
+    res = search("季氏旅于泰山。")
+    print(res)
     demo.launch(mcp_server=True)

prepare.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from whoosh.fields import TEXT, SchemaClass, ID
+from jieba.analyse import ChineseAnalyzer
+from whoosh.index import create_in
+import json
+analyzer = ChineseAnalyzer()
+class ArticleSchema(SchemaClass):
+    index = ID(stored=True)
+    原文 = TEXT(stored=True, analyzer=analyzer)
+    注释 = TEXT(stored=True, analyzer=analyzer)
+    批判 = TEXT(stored=True, analyzer=analyzer)
+    章节 = TEXT(stored=True, analyzer=analyzer)
+schema = ArticleSchema()
+ix = create_in("indexdir", schema, indexname='article_index')
+writer = ix.writer()
+with open("反孔.json", encoding="utf-8") as json_file:
+    raw_jsons = json.load(json_file)
+for vhjx_item in raw_jsons:
+    for jvvi_item in vhjx_item[1:]:
+        print(jvvi_item['index'])
+        writer.add_document(index=jvvi_item['index'], 原文=jvvi_item['原文'],
+                            注释=jvvi_item['注释'] if "注释" in jvvi_item else "", 批判=jvvi_item['批判'] if "批判" in jvvi_item else "", 章节=vhjx_item[0])
+writer.commit()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ whoosh-reloaded==2.7.5
2	+ jieba==0.42.1

test.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from whoosh.qparser import QueryParser
+from whoosh.index import open_dir
+import re
+from whoosh.query import Term
+ix = open_dir("indexdir", indexname='article_index')
+with ix.searcher() as searcher:
+    query = QueryParser("原文", ix.schema).parse("季氏旅于泰山。")
+    results = searcher.search(query)
+    if results:
+        hit = results[0]
+        print("原始命中记录：", hit)
+        批判文本 = hit.get("批判", "")
+        matches = re.findall(r'\d+[\·\.]\d+', 批判文本)
+        print("匹配到的 index 值：", matches)
+        map_hit = dict(hit)
+        map_hit['extra'] = []
+        for index_ref in matches:
+            index_ref_normalized = index_ref.replace( '.' , '·')
+            term_query = Term("index", index_ref_normalized)
+            related_results = searcher.search(term_query)
+            for related_hit in related_results:
+                print(f"\n关联 index {index_ref_normalized} 的记录：")
+                map_hit['extra'].append(dict(related_hit))
+        print(map_hit)

反孔.json ADDED Viewed

The diff for this file is too large to render. See raw diff