vanhai123's picture
Upload 6 files
8cf4b8e verified
raw
history blame contribute delete
695 Bytes
from datasets import load_dataset
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
# Load dataset
dataset = load_dataset("facebook/kilt_tasks", "fever", split="train[:10%]")
# Convert to documents
documents = []
for item in dataset:
text = item['input']
documents.append(Document(page_content=text, metadata={"id": item['id']}))
# Embed documents
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(documents, embeddings)
# Save FAISS index
db.save_local("vectorstore")
print("✅ Saved vectorstore!")