import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain


model_name = "Meldashti/chatbot"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the fine-tuned model using PEFT
model = AutoModelForCausalLM.from_pretrained(model_name)
model = PeftModel.from_pretrained(model, model_name) 
# Example: Assume you have a collection of text documents.
documents = [
    Document(page_content="Document 1 content goes here..."),
    Document(page_content="Document 2 content goes here..."),
    # Add more documents as needed
]

# Initialize the Hugging Face embeddings model
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Split documents into smaller chunks for better retrieval
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
split_documents = text_splitter.split_documents(documents)

# Create the FAISS vector store
vector_store = FAISS.from_documents(split_documents, embeddings)

# Define a retriever that uses FAISS vector store
retriever = vector_store.as_retriever()

# Define the prompt template for the RAG pipeline
prompt_template = """
You are a helpful assistant. When a user asks a question, you will:
1. Retrieve relevant information from the knowledge base (provided by the retriever).
2. Answer the question based on that retrieved information.
Here is the context: {context}
Question: {question}
Answer:
"""

# Load the HuggingFace model and integrate it with LangChain
qa_chain = load_qa_chain(model=model, chain_type="stuff")

# Set up the RAG chain
rag_chain = RetrievalQA(
    llm=HuggingFacePipeline(pipeline=qa_chain),
    retriever=retriever,
    return_source_documents=True,
)
def chat(message, history):
    response = rag_chain.run(message)
    return str(response)

demo = gr.ChatInterface(chat, type="messages", autofocus=False)

if __name__ == "__main__":
    demo.launch()