shigureui commited on
Commit
8c639d1
·
1 Parent(s): 6cab0b1
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. app.py +7 -3
  3. embeddings.json +3 -0
.gitattributes CHANGED
@@ -32,4 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *.json filter=lfs diff=lfs merge=lfs -text
36
  *tfevents* filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -21,8 +21,8 @@ def decrypt_file(input_path, key):
21
 
22
  llm = llama_cpp.Llama.from_pretrained(repo_id="mradermacher/bge-large-zh-v1.5-GGUF", filename="bge-large-zh-v1.5.Q4_K_M.gguf", embedding=True)
23
 
24
- embedding_1 = llm.create_embedding("Hello, world!")
25
- embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
26
 
27
  from pymilvus import MilvusClient
28
  client = MilvusClient("./books.db")
@@ -40,6 +40,10 @@ raw_jsons = json.loads(decrypted_content)
40
  docs = []
41
  metas = []
42
 
 
 
 
 
43
  for vhjx_index, vhjx_item in enumerate(raw_jsons):
44
  chapter = vhjx_item[0]
45
  for jvvi_item in vhjx_item[1:]:
@@ -56,7 +60,7 @@ for vhjx_index, vhjx_item in enumerate(raw_jsons):
56
  # 一个章节一次
57
  # 批量生成 embeddings(每个为 list[float])
58
  emb_result = llm.create_embedding(docs)
59
- embeddings = [item["embedding"] for item in emb_result["data"]] # List[List[float]]
60
 
61
  # 准备数据
62
  milvus_data = []
 
21
 
22
  llm = llama_cpp.Llama.from_pretrained(repo_id="mradermacher/bge-large-zh-v1.5-GGUF", filename="bge-large-zh-v1.5.Q4_K_M.gguf", embedding=True)
23
 
24
+ # embedding_1 = llm.create_embedding("Hello, world!")
25
+ # embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
26
 
27
  from pymilvus import MilvusClient
28
  client = MilvusClient("./books.db")
 
40
  docs = []
41
  metas = []
42
 
43
+ with open('embeddings.json', mode='w+') as embedding_file:
44
+ all_embs = json.load(embedding_file)
45
+
46
+
47
  for vhjx_index, vhjx_item in enumerate(raw_jsons):
48
  chapter = vhjx_item[0]
49
  for jvvi_item in vhjx_item[1:]:
 
60
  # 一个章节一次
61
  # 批量生成 embeddings(每个为 list[float])
62
  emb_result = llm.create_embedding(docs)
63
+ embeddings = all_embs[vhjx_index] # List[List[float]]
64
 
65
  # 准备数据
66
  milvus_data = []
embeddings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bfd6c4db5126d998144279518e6f0d134c7c84cbe07d5a8531711a1ec949602
3
+ size 119355981