import os import gradio as gr import spaces from langchain.chains.combine_documents import create_stuff_documents_chain from langchain.chains.retrieval import create_retrieval_chain from langchain_core.messages import HumanMessage, AIMessage from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_huggingface import HuggingFacePipeline from langchain_huggingface.embeddings import HuggingFaceEmbeddings from langchain_pinecone import PineconeVectorStore from peft import PeftModel from pinecone import Pinecone as PC from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Initialize Pinecone PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") PINECONE_INDEX = "arolchatbot" # e.g., "us-west1-gcp-free" pc = PC(api_key=PINECONE_API_KEY) index = pc.Index(PINECONE_INDEX) # Connect to Pinecone embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-large") vector_store = PineconeVectorStore(index, embeddings, "content") # Model and Tokenizer model_name = "Meldashti/chatbot" base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-3B") tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B") # Merge PEFT weights with base model model = PeftModel.from_pretrained(base_model, model_name) model = model.merge_and_unload() # Simplified pipeline with minimal parameters generator = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.7, ) # LLM wrapper llm = HuggingFacePipeline(pipeline=generator) retriever = vector_store.as_retriever(search_kwargs={"k": 5}) prompt = """ Context: {context} Question: {input} """ # prompt = hub.pull("rlm/rag-prompt") prompt_template = ChatPromptTemplate.from_messages( [ HumanMessage("""" You are an assistant for AROL company. You need to be professional and helpful in your responses to users. Your tone should be formal and respectful. If you are unsure about the answer, you can kindly answer them with "I am not sure about that, please contact our support team for more information." Please provide the most accurate and helpful response to the user's question. Do not provide any personal information or any information that is not related to the question. Do not include anything else but the answer to user question or request if they are related to AROL company and its services. Given the following context, answer the question as accurately as possible. Do not repeat the context or the question in the response. Start the response directly. The response should contain only the answer to the question. For example: Q: Hi, Who are you? A: Hello there, I am an assistant for AROL company. """), HumanMessage( "Context: AROL company is a software company that provides software solutions for businesses., Question: Hi, Who are you?"), AIMessage( "Hello there, I am an customer care assistant for AROL company"), ('human', prompt), ] ) # Retrieval QA Chain qa_chain = create_stuff_documents_chain(llm=llm, prompt=prompt_template) rag_chain = create_retrieval_chain(retriever=retriever, combine_docs_chain=qa_chain) @spaces.GPU def chat(message, history): # Chat function with extensive logging print(f"Received message: {message}") try: response = rag_chain.invoke({"input": message}) print(response) if isinstance(response, str): # Clean up the output to remove unnecessary text response = response.split("Answer: ")[1].split("