Lunyu-Critique-MCP / prepare.py
shigureui's picture
init
6ff3fec
from whoosh.fields import TEXT, SchemaClass, ID
from jieba.analyse import ChineseAnalyzer
from whoosh.index import create_in
import json
analyzer = ChineseAnalyzer()
class ArticleSchema(SchemaClass):
index = ID(stored=True)
原文 = TEXT(stored=True, analyzer=analyzer)
注释 = TEXT(stored=True, analyzer=analyzer)
批判 = TEXT(stored=True, analyzer=analyzer)
章节 = TEXT(stored=True, analyzer=analyzer)
schema = ArticleSchema()
ix = create_in("indexdir", schema, indexname='article_index')
writer = ix.writer()
with open("反孔.json", encoding="utf-8") as json_file:
raw_jsons = json.load(json_file)
for vhjx_item in raw_jsons:
for jvvi_item in vhjx_item[1:]:
print(jvvi_item['index'])
writer.add_document(index=jvvi_item['index'], 原文=jvvi_item['原文'],
注释=jvvi_item['注释'] if "注释" in jvvi_item else "", 批判=jvvi_item['批判'] if "批判" in jvvi_item else "", 章节=vhjx_item[0])
writer.commit()