mohammadhakimi commited on
Commit
6249edf
·
verified ·
1 Parent(s): 821d5cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -10
app.py CHANGED
@@ -7,7 +7,24 @@ from langchain.docstore.document import Document
7
  from langchain.chains import RetrievalQA
8
  from langchain_huggingface import HuggingFacePipeline
9
  from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 
 
 
 
 
 
10
 
 
 
 
 
 
 
 
 
 
 
 
11
  # Model and Tokenizer
12
  model_name = "Meldashti/chatbot"
13
  base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-3B")
@@ -28,22 +45,16 @@ generator = pipeline(
28
  # LLM wrapper
29
  llm = HuggingFacePipeline(pipeline=generator)
30
 
31
- # Embeddings
32
- embeddings = HuggingFaceEmbeddings(model_name="paraphrase-MiniLM-L3-v2")
33
-
34
- # Sample documents (minimal)
35
- documents = [
36
- Document(page_content="Example document about food industry caps"),
37
- Document(page_content="Information about manufacturing processes")
38
- ]
39
 
40
  # Text splitting
41
- text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=20)
42
  split_documents = text_splitter.split_documents(documents)
43
 
44
  # Vector store
45
  vector_store = FAISS.from_documents(split_documents, embeddings)
46
- retriever = vector_store.as_retriever(search_kwargs={"k": 2})
47
 
48
  # Retrieval QA Chain
49
  rag_chain = RetrievalQA.from_chain_type(
 
7
  from langchain.chains import RetrievalQA
8
  from langchain_huggingface import HuggingFacePipeline
9
  from langchain_huggingface.embeddings import HuggingFaceEmbeddings
10
+ from sentence_transformers import SentenceTransformer
11
+ import os
12
+ import pinecone
13
+ import numpy as np
14
+ from langchain.vectorstores import Pinecone
15
+ from langchain.schema import Document
16
 
17
+ # Initialize Pinecone
18
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
19
+ PINECONE_INDEX = "arolchatbot" # e.g., "us-west1-gcp-free"
20
+
21
+ # Connect to Pinecone
22
+ pinecone.init(api_key=PINECONE_API_KEY)
23
+
24
+ index = pinecone.Index(INDEX_NAME)
25
+ embedder = SentenceTransformer('thenlper/gte-large')
26
+ vector_store = Pinecone(index, embedder.embed_query, "text")
27
+ embeddings = HuggingFaceEmbeddings(model=sentence_model)
28
  # Model and Tokenizer
29
  model_name = "Meldashti/chatbot"
30
  base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-3B")
 
45
  # LLM wrapper
46
  llm = HuggingFacePipeline(pipeline=generator)
47
 
48
+ # Wrap the Pinecone index with LangChain's Pinecone wrapper
49
+ vector_store = Pinecone(index, embeddings.embed_query, "text")
 
 
 
 
 
 
50
 
51
  # Text splitting
52
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
53
  split_documents = text_splitter.split_documents(documents)
54
 
55
  # Vector store
56
  vector_store = FAISS.from_documents(split_documents, embeddings)
57
+ retriever = vector_store.as_retriever(search_kwargs={"k": 5})
58
 
59
  # Retrieval QA Chain
60
  rag_chain = RetrievalQA.from_chain_type(