shigureui commited on
Commit
6ff3fec
·
1 Parent(s): 898699f
Files changed (5) hide show
  1. app.py +65 -18
  2. prepare.py +27 -0
  3. requirements.txt +2 -0
  4. test.py +33 -0
  5. 反孔.json +0 -0
app.py CHANGED
@@ -1,28 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def letter_counter(word, letter):
4
- """
5
- Count the number of occurrences of a letter in a word or text.
6
 
7
- Args:
8
- word (str): The input text to search through
9
- letter (str): The letter to search for
 
10
 
11
- Returns:
12
- str: A message indicating how many times the letter appears
13
- """
14
- word = word.lower()
15
- letter = letter.lower()
16
- count = word.count(letter)
17
- return count
18
 
19
  demo = gr.Interface(
20
- fn=letter_counter,
21
- inputs=["textbox", "textbox"],
22
- outputs="number",
23
- title="Letter Counter",
24
- description="Enter text and a letter to count how many times the letter appears in the text."
25
  )
26
 
27
  if __name__ == "__main__":
 
 
28
  demo.launch(mcp_server=True)
 
1
+ from whoosh.fields import TEXT, SchemaClass, ID
2
+ from jieba.analyse import ChineseAnalyzer
3
+ from whoosh.index import create_in
4
+ import json
5
+
6
+ analyzer = ChineseAnalyzer()
7
+ class ArticleSchema(SchemaClass):
8
+ index = ID(stored=True)
9
+ 原文 = TEXT(stored=True, analyzer=analyzer)
10
+ 注释 = TEXT(stored=True, analyzer=analyzer)
11
+ 批判 = TEXT(stored=True, analyzer=analyzer)
12
+ 章节 = TEXT(stored=True, analyzer=analyzer)
13
+
14
+ schema = ArticleSchema()
15
+ ix = create_in("indexdir", schema, indexname='article_index')
16
+ writer = ix.writer()
17
+
18
+
19
+ with open("反孔.json", encoding="utf-8") as json_file:
20
+ raw_jsons = json.load(json_file)
21
+
22
+ for vhjx_item in raw_jsons:
23
+ for jvvi_item in vhjx_item[1:]:
24
+ print(jvvi_item['index'])
25
+ writer.add_document(index=jvvi_item['index'], 原文=jvvi_item['原文'],
26
+ 注释=jvvi_item['注释'] if "注释" in jvvi_item else "", 批判=jvvi_item['批判'] if "批判" in jvvi_item else "", 章节=vhjx_item[0])
27
+ writer.commit()
28
+
29
+ # init
30
+
31
  import gradio as gr
32
+ from whoosh.qparser import QueryParser
33
+ from whoosh.index import open_dir
34
+ import re
35
+ from whoosh.query import Term
36
+ ix = open_dir("indexdir", indexname='article_index')
37
+ searcher = ix.searcher()
38
+
39
+ def search(query_info):
40
+ query = QueryParser("原文", ix.schema).parse(query_info)
41
+ results = searcher.search(query)
42
+
43
+ map_hit = []
44
+ for hit in results:
45
+ 批判文本 = hit.get("批判", "")
46
+ matches = re.findall(r'\d+[\·\.]\d+', 批判文本)
47
+
48
+ map_hit.append(dict(hit))
49
+ map_hit[-1]['extra'] = []
50
+ for index_ref in matches:
51
+ index_ref_normalized = index_ref.replace( '.' , '·')
52
 
53
+ term_query = Term("index", index_ref_normalized)
54
+ related_results = searcher.search(term_query)
 
55
 
56
+ for related_hit in related_results:
57
+ map_hit[-1]['extra'].append(dict(related_hit))
58
+
59
+ return map_hit
60
 
61
+ def lunyu_search(query):
62
+ return search(query_info=query)
 
 
 
 
 
63
 
64
  demo = gr.Interface(
65
+ fn=lunyu_search,
66
+ inputs=["query"],
67
+ outputs="result",
68
+ title="论语批判MCP",
69
+ description="输入模糊的论语原文,可以查询到对应的批判内容。"
70
  )
71
 
72
  if __name__ == "__main__":
73
+ res = search("季氏旅于泰山。")
74
+ print(res)
75
  demo.launch(mcp_server=True)
prepare.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from whoosh.fields import TEXT, SchemaClass, ID
2
+ from jieba.analyse import ChineseAnalyzer
3
+ from whoosh.index import create_in
4
+ import json
5
+
6
+ analyzer = ChineseAnalyzer()
7
+ class ArticleSchema(SchemaClass):
8
+ index = ID(stored=True)
9
+ 原文 = TEXT(stored=True, analyzer=analyzer)
10
+ 注释 = TEXT(stored=True, analyzer=analyzer)
11
+ 批判 = TEXT(stored=True, analyzer=analyzer)
12
+ 章节 = TEXT(stored=True, analyzer=analyzer)
13
+
14
+ schema = ArticleSchema()
15
+ ix = create_in("indexdir", schema, indexname='article_index')
16
+ writer = ix.writer()
17
+
18
+
19
+ with open("反孔.json", encoding="utf-8") as json_file:
20
+ raw_jsons = json.load(json_file)
21
+
22
+ for vhjx_item in raw_jsons:
23
+ for jvvi_item in vhjx_item[1:]:
24
+ print(jvvi_item['index'])
25
+ writer.add_document(index=jvvi_item['index'], 原文=jvvi_item['原文'],
26
+ 注释=jvvi_item['注释'] if "注释" in jvvi_item else "", 批判=jvvi_item['批判'] if "批判" in jvvi_item else "", 章节=vhjx_item[0])
27
+ writer.commit()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ whoosh-reloaded==2.7.5
2
+ jieba==0.42.1
test.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from whoosh.qparser import QueryParser
2
+ from whoosh.index import open_dir
3
+ import re
4
+ from whoosh.query import Term
5
+
6
+
7
+ ix = open_dir("indexdir", indexname='article_index')
8
+ with ix.searcher() as searcher:
9
+ query = QueryParser("原文", ix.schema).parse("季氏旅于泰山。")
10
+ results = searcher.search(query)
11
+
12
+ if results:
13
+ hit = results[0]
14
+ print("原始命中记录:", hit)
15
+
16
+ 批判文本 = hit.get("批判", "")
17
+ matches = re.findall(r'\d+[\·\.]\d+', 批判文本)
18
+
19
+ print("匹配到的 index 值:", matches)
20
+
21
+ map_hit = dict(hit)
22
+ map_hit['extra'] = []
23
+ for index_ref in matches:
24
+ index_ref_normalized = index_ref.replace( '.' , '·')
25
+
26
+ term_query = Term("index", index_ref_normalized)
27
+ related_results = searcher.search(term_query)
28
+
29
+ for related_hit in related_results:
30
+ print(f"\n关联 index {index_ref_normalized} 的记录:")
31
+ map_hit['extra'].append(dict(related_hit))
32
+
33
+ print(map_hit)
反孔.json ADDED
The diff for this file is too large to render. See raw diff