|
from datasets import load_dataset
|
|
from langchain.docstore.document import Document
|
|
from langchain.vectorstores import FAISS
|
|
from langchain.embeddings import HuggingFaceEmbeddings
|
|
|
|
|
|
dataset = load_dataset("facebook/kilt_tasks", "fever", split="train[:10%]")
|
|
|
|
|
|
documents = []
|
|
for item in dataset:
|
|
text = item['input']
|
|
documents.append(Document(page_content=text, metadata={"id": item['id']}))
|
|
|
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
db = FAISS.from_documents(documents, embeddings)
|
|
|
|
|
|
db.save_local("vectorstore")
|
|
print("✅ Saved vectorstore!")
|
|
|