Spaces:

seanpedrickcase
/

Light-PDF-Web-QA-Chatbot

Running

@@ -4,18 +4,21 @@ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 import gradio as gr
 import pandas as pd
 from chatfuncs.ingest import embed_faiss_save_to_zip
-from chatfuncs.helper_functions import ensure_output_folder_exists, get_connection_params, output_folder, reveal_feedback_buttons, wipe_logs
 from chatfuncs.aws_functions import upload_file_to_s3
 from chatfuncs.auth import authenticate_user
-from chatfuncs.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH
-from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,  AutoModelForCausalLM
-import os
 PandasDataFrame = Type[pd.DataFrame]
@@ -27,83 +30,93 @@ access_logs_data_folder = ACCESS_LOGS_FOLDER
 feedback_data_folder = FEEDBACK_LOGS_FOLDER
 usage_data_folder = USAGE_LOGS_FOLDER
 # Disable cuda devices if necessary
 #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-import chatfuncs.ingest as ing
 ###
 # Load preset embeddings, vectorstore, and model
 ###
-embeddings_name =  "BAAI/bge-base-en-v1.5" #"mixedbread-ai/mxbai-embed-xsmall-v1"
-def load_embeddings(embeddings_name = embeddings_name):
-    embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_name)
-    global embeddings
-    embeddings = embeddings_func
-    return embeddings
-def get_faiss_store(faiss_vstore_folder,embeddings):
-    import zipfile
     with zipfile.ZipFile(faiss_vstore_folder + '/' + faiss_vstore_folder + '.zip', 'r') as zip_ref:
         zip_ref.extractall(faiss_vstore_folder)
-    faiss_vstore = FAISS.load_local(folder_path=faiss_vstore_folder, embeddings=embeddings, allow_dangerous_deserialization=True)
     os.remove(faiss_vstore_folder + "/index.faiss")
     os.remove(faiss_vstore_folder + "/index.pkl")
-    global vectorstore
-    vectorstore = faiss_vstore
-    return vectorstore
-import chatfuncs.chatfuncs as chatf
-from chatfuncs.model_load import torch_device, gpu_config, cpu_config, context_length
-chatf.embeddings = load_embeddings(embeddings_name)
-chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
-def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
     print(f"> Total split documents: {len(docs_out)}")
     print(docs_out)
-    vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
     chatf.vectorstore = vectorstore_func
     out_message = "Document processing complete"
     return out_message, vectorstore_func
- # Gradio chat
-def create_hf_model(model_name:str):
     if torch_device == "cuda":
         if "flan" in model_name:
             model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
         else:
-            model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
     else:
         if "flan" in model_name:
             model = AutoModelForSeq2SeqLM.from_pretrained(model_name)#, torch_dtype=torch.float16)
-        else:
-            model = AutoModelForCausalLM.from_pretrained(model_name)#, trust_remote_code=True)#, torch_dtype=torch.float16)
-    tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length)
     return model, tokenizer
 def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_config:dict=cpu_config, torch_device:str=torch_device):
     print("Loading model")
-    if model_type == "Phi 3.5 Mini (larger, slow)":
         if torch_device == "cuda":
             gpu_config.update_gpu(gpu_layers)
             print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU.")
@@ -113,33 +126,30 @@ def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_c
             print("Loading with", cpu_config.n_gpu_layers, "model layers sent to GPU.")
-        print(vars(gpu_config))
-        print(vars(cpu_config))
         try:
             model = Llama(
             model_path=hf_hub_download(
-            repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF"),# "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
-            filename=os.environ.get("MODEL_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf")  #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
         ),
         **vars(gpu_config) # change n_gpu_layers if you have more or less VRAM
         )
         except Exception as e:
-            print("GPU load failed", e)
             model = Llama(
             model_path=hf_hub_download(
-            repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF"), #"QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #, "microsoft/Phi-3-mini-4k-instruct-gguf"),#"QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
-            filename=os.environ.get("MODEL_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf"), # "Phi-3-mini-128k-instruct.Q4_K_M.gguf") # , #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf"),#"mistral-7b-openorca.Q4_K_M.gguf"),
         ),
         **vars(cpu_config)
         )
         tokenizer = []
-    if model_type == "Qwen 2 0.5B (small, fast)":
         # Huggingface chat model
-        hf_checkpoint = 'Qwen/Qwen2-0.5B-Instruct'# 'declare-lab/flan-alpaca-large'#'declare-lab/flan-alpaca-base' # # # 'Qwen/Qwen1.5-0.5B-Chat' #
         model, tokenizer = create_hf_model(model_name = hf_checkpoint)
@@ -165,11 +175,9 @@ def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_c
 app = gr.Blocks(theme = gr.themes.Base(), fill_width=True)#css=".gradio-container {background-color: black}")
 with app:
-    model_type = "Qwen 2 0.5B (small, fast)"
     load_model(model_type, 0, gpu_config, cpu_config, torch_device) # chatf.model_object, chatf.tokenizer, chatf.model_type =
-    print("chatf.model_object:", chatf.model_object)
     # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
     #model_type = "Phi 3.5 Mini (larger, slow)"
     #load_model(model_type, gpu_layers, gpu_config, cpu_config, torch_device)
@@ -182,11 +190,16 @@ with app:
     gpu_config_state = gr.State(gpu_config)
     cpu_config_state = gr.State(cpu_config)
     torch_device_state = gr.State(torch_device)
-    embeddings_state = gr.State(chatf.embeddings)#globals()["embeddings"])
-    vectorstore_state = gr.State(chatf.vectorstore)#globals()["vectorstore"])
     relevant_query_state = gr.Checkbox(value=True, visible=False)
     model_state = gr.State() # chatf.model_object (gives error)
     tokenizer_state = gr.State() # chatf.tokenizer (gives error)
@@ -194,7 +207,8 @@ with app:
     instruction_prompt_out = gr.State()
     session_hash_state = gr.State()
-    s3_output_folder_state = gr.State()
     session_hash_textbox = gr.Textbox(value="", visible=False)
     s3_logs_output_textbox = gr.Textbox(label="S3 logs", visible=False)
@@ -208,14 +222,11 @@ with app:
     gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
-    gr.Markdown("Chat with PDF, web page or (new) csv/Excel documents. The default is a small model (Qwen 2 0.5B), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative (Phi 3.5 Mini (larger, slow)), can reason a little better, but is much slower (See Advanced tab).\n\nBy default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
-    with gr.Accordion(label="Use Gemini or AWS Claude model", open=False, visible=False):
-        api_model_choice = gr.Dropdown(value = "None", choices = ["gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25", "anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0", "None"], label="LLM model to use", multiselect=False, interactive=True, visible=False)
-        in_api_key = gr.Textbox(value = "", label="Enter Gemini API key (only if using Google API models)", lines=1, type="password",interactive=True, visible=False)
     with gr.Row():
-        current_source = gr.Textbox(label="Current data source(s)", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf", scale = 10)
         current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
     with gr.Tab("Chatbot"):
@@ -234,17 +245,12 @@ with app:
         with gr.Row():
             submit = gr.Button(value="Send message", variant="primary", scale = 4)
             clear = gr.Button(value="Clear chat", variant="secondary", scale=1)
-            stop = gr.Button(value="Stop generating", variant="secondary", scale=1)
-        examples_set = gr.Radio(label="Examples for the Lambeth Borough Plan",
-            #value = "What were the five pillars of the previous borough plan?",
-            choices=["What were the five pillars of the previous borough plan?",
-                "What is the vision statement for Lambeth?",
-                "What are the commitments for Lambeth?",
-                "What are the 2030 outcomes for Lambeth?"])
-        current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here", visible=False)
     with gr.Tab("Load in a different file to chat with"):
         with gr.Accordion("PDF file", open = False):
@@ -270,7 +276,8 @@ with app:
         out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
         temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
         with gr.Row():
-            model_choice = gr.Radio(label="Choose a chat model", value="Qwen 2 0.5B (small, fast)", choices = ["Qwen 2 0.5B (small, fast)", "Phi 3.5 Mini (larger, slow)", "gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25", "anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"])
             change_model_button = gr.Button(value="Load model", scale=0)
         with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
             gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
@@ -278,28 +285,27 @@ with app:
         load_text = gr.Text(label="Load status")
     gr.HTML(
-        "<center>This app is based on the models Qwen 2 0.5B and Phi 3.5 Mini. It powered by Gradio, Transformers, and Llama.cpp.</a></center>"
     )
     examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
     ###
     # CHAT PAGE
     ###
     # Click to send message
-    response_click = submit.click(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages, api_model_choice, in_api_key], outputs=[chat_history_state, sources, instruction_prompt_out, relevant_query_state], queue=False, api_name="retrieval").\
                 success(chatf.turn_off_interactivity, inputs=None, outputs=[message, submit], queue=False).\
-                success(chatf.produce_streaming_answer_chatbot, inputs=[chatbot, instruction_prompt_out, model_type_state, temp_slide, relevant_query_state, chat_history_state], outputs=chatbot)
     response_click.success(chatf.highlight_found_text, [chatbot, sources], [sources]).\
                 success(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
                 success(lambda: chatf.restore_interactivity(), None, [message, submit], queue=False)
     # Press enter to send message
-    response_enter = message.submit(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages, api_model_choice, in_api_key], outputs=[chat_history_state, sources, instruction_prompt_out, relevant_query_state], queue=False).\
                 success(chatf.turn_off_interactivity, inputs=None, outputs=[message, submit], queue=False).\
-                success(chatf.produce_streaming_answer_chatbot, [chatbot, instruction_prompt_out, model_type_state, temp_slide, relevant_query_state, chat_history_state], chatbot)
     response_enter.success(chatf.highlight_found_text, [chatbot, sources], [sources]).\
                 success(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
                 success(lambda: chatf.restore_interactivity(), None, [message, submit], queue=False)
@@ -322,19 +328,19 @@ with app:
     # Load in a pdf
     load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
              success(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
-             success(embed_faiss_save_to_zip, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
              success(chatf.hide_block, outputs = [examples_set])
     # Load in a webpage
     load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
              success(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
-             success(embed_faiss_save_to_zip, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
              success(chatf.hide_block, outputs = [examples_set])
     # Load in a csv/excel file
     load_csv_click = load_csv.click(ing.parse_csv_or_excel, inputs=[in_csv, in_text_column], outputs=[ingest_text, current_source]).\
              success(ing.csv_excel_text_to_docs, inputs=[ingest_text, in_text_column], outputs=[ingest_docs]).\
-             success(embed_faiss_save_to_zip, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
              success(chatf.hide_block, outputs = [examples_set])
@@ -350,9 +356,11 @@ with app:
     ###
     # LOGGING AND ON APP LOAD FUNCTIONS
-    ###
-    app.load(get_connection_params, inputs=None, outputs=[session_hash_state, s3_output_folder_state, session_hash_textbox]).\
-    success(load_model, inputs=[model_type_state, gpu_layer_choice, gpu_config_state, cpu_config_state, torch_device_state], outputs=[model_type_state, load_text, current_model])
     # Log usernames and times of access to file (to know who is using the app when running on AWS)
     access_callback = gr.CSVLogger()
@@ -362,7 +370,7 @@ with app:
     success(fn = upload_file_to_s3, inputs=[access_logs_state, access_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
 if __name__ == "__main__":
-    if os.environ['COGNITO_AUTH'] == "1":
-        app.queue().launch(show_error=True, auth=authenticate_user, max_file_size='50mb')
     else:
-        app.queue().launch(show_error=True, inbrowser=True, max_file_size='50mb')

 from langchain_community.vectorstores import FAISS
 import gradio as gr
 import pandas as pd
+from torch import float16
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,  AutoModelForCausalLM
+import zipfile
 from chatfuncs.ingest import embed_faiss_save_to_zip
+from chatfuncs.helper_functions import get_connection_params, reveal_feedback_buttons, wipe_logs
 from chatfuncs.aws_functions import upload_file_to_s3
 from chatfuncs.auth import authenticate_user
+from chatfuncs.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH, INPUT_FOLDER, OUTPUT_FOLDER, MAX_QUEUE_SIZE, DEFAULT_CONCURRENCY_LIMIT, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, DEFAULT_EMBEDDINGS_LOCATION, EMBEDDINGS_MODEL_NAME, DEFAULT_DATA_SOURCE, HF_TOKEN, LARGE_MODEL_REPO_ID, LARGE_MODEL_GGUF_FILE, LARGE_MODEL_NAME, SMALL_MODEL_NAME, SMALL_MODEL_REPO_ID, DEFAULT_DATA_SOURCE_NAME, DEFAULT_EXAMPLES, DEFAULT_MODEL_CHOICES
+from chatfuncs.model_load import torch_device, gpu_config, cpu_config, context_length
+import chatfuncs.chatfuncs as chatf
+import chatfuncs.ingest as ing
 PandasDataFrame = Type[pd.DataFrame]
 feedback_data_folder = FEEDBACK_LOGS_FOLDER
 usage_data_folder = USAGE_LOGS_FOLDER
+if isinstance(DEFAULT_EXAMPLES, str): default_examples_set = eval(DEFAULT_EXAMPLES)
+if isinstance(DEFAULT_MODEL_CHOICES, str): default_model_choices = eval(DEFAULT_MODEL_CHOICES)
 # Disable cuda devices if necessary
 #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 ###
 # Load preset embeddings, vectorstore, and model
 ###
+def load_embeddings_model(embeddings_model = EMBEDDINGS_MODEL_NAME):
+    embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_model)
+    #global embeddings
+    #embeddings = embeddings_func
+    return embeddings_func
+def get_faiss_store(faiss_vstore_folder:str, embeddings_model:object):
     with zipfile.ZipFile(faiss_vstore_folder + '/' + faiss_vstore_folder + '.zip', 'r') as zip_ref:
         zip_ref.extractall(faiss_vstore_folder)
+    faiss_vstore = FAISS.load_local(folder_path=faiss_vstore_folder, embeddings=embeddings_model, allow_dangerous_deserialization=True)
     os.remove(faiss_vstore_folder + "/index.faiss")
     os.remove(faiss_vstore_folder + "/index.pkl")
+    #global vectorstore
+    #vectorstore = faiss_vstore
+    return faiss_vstore #vectorstore
+# Load in default embeddings and embeddings model name
+embeddings_model = load_embeddings_model(EMBEDDINGS_MODEL_NAME)
+vectorstore = get_faiss_store(faiss_vstore_folder=DEFAULT_EMBEDDINGS_LOCATION,embeddings_model=embeddings_model)#globals()["embeddings"])
+chatf.embeddings = embeddings_model
+chatf.vectorstore = vectorstore
+def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings_model=embeddings_model):
     print(f"> Total split documents: {len(docs_out)}")
     print(docs_out)
+    vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings_model)
     chatf.vectorstore = vectorstore_func
     out_message = "Document processing complete"
     return out_message, vectorstore_func
+def create_hf_model(model_name:str, hf_token=HF_TOKEN):
     if torch_device == "cuda":
         if "flan" in model_name:
             model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
         else:
+            if hf_token:
+                model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", token=hf_token) # , torch_dtype=float16
+            else:
+                model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") # , torch_dtype=float16
     else:
         if "flan" in model_name:
             model = AutoModelForSeq2SeqLM.from_pretrained(model_name)#, torch_dtype=torch.float16)
+        else:
+            if hf_token:
+                model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token) # , torch_dtype=float16
+            else:
+                model = AutoModelForCausalLM.from_pretrained(model_name) # , torch_dtype=float16
+    if hf_token:
+        tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length, token=hf_token)
+    else:
+        tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length)
     return model, tokenizer
 def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_config:dict=cpu_config, torch_device:str=torch_device):
     print("Loading model")
+    if model_type == LARGE_MODEL_NAME:
         if torch_device == "cuda":
             gpu_config.update_gpu(gpu_layers)
             print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU.")
             print("Loading with", cpu_config.n_gpu_layers, "model layers sent to GPU.")
         try:
             model = Llama(
             model_path=hf_hub_download(
+            repo_id=LARGE_MODEL_REPO_ID,
+            filename=LARGE_MODEL_GGUF_FILE
         ),
         **vars(gpu_config) # change n_gpu_layers if you have more or less VRAM
         )
         except Exception as e:
+            print("GPU load failed", e, "loading CPU version instead")
             model = Llama(
             model_path=hf_hub_download(
+            repo_id=LARGE_MODEL_REPO_ID,
+            filename=LARGE_MODEL_GGUF_FILE
         ),
         **vars(cpu_config)
         )
         tokenizer = []
+    if model_type == SMALL_MODEL_NAME:
         # Huggingface chat model
+        hf_checkpoint = SMALL_MODEL_REPO_ID# 'declare-lab/flan-alpaca-large'#'declare-lab/flan-alpaca-base' # # # 'Qwen/Qwen1.5-0.5B-Chat' #
         model, tokenizer = create_hf_model(model_name = hf_checkpoint)
 app = gr.Blocks(theme = gr.themes.Base(), fill_width=True)#css=".gradio-container {background-color: black}")
 with app:
+    model_type = SMALL_MODEL_NAME
     load_model(model_type, 0, gpu_config, cpu_config, torch_device) # chatf.model_object, chatf.tokenizer, chatf.model_type =
     # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
     #model_type = "Phi 3.5 Mini (larger, slow)"
     #load_model(model_type, gpu_layers, gpu_config, cpu_config, torch_device)
     gpu_config_state = gr.State(gpu_config)
     cpu_config_state = gr.State(cpu_config)
     torch_device_state = gr.State(torch_device)
+    # Embeddings related vars
+    embeddings_model_object_state = gr.State(embeddings_model)#globals()["embeddings"])
+    vectorstore_state = gr.State(vectorstore)#globals()["vectorstore"])
+    default_embeddings_store_text = gr.Textbox(value=DEFAULT_EMBEDDINGS_LOCATION, visible=False)
+    # Is the query relevant to the sources provided?
     relevant_query_state = gr.Checkbox(value=True, visible=False)
+    # Storing model objects in state doesn't seem to work, so we have to load in different models in roundabout ways
     model_state = gr.State() # chatf.model_object (gives error)
     tokenizer_state = gr.State() # chatf.tokenizer (gives error)
     instruction_prompt_out = gr.State()
     session_hash_state = gr.State()
+    output_folder_textbox = gr.Textbox(value=OUTPUT_FOLDER, visible=False)
+    input_folder_textbox = gr.Textbox(value=INPUT_FOLDER, visible=False)
     session_hash_textbox = gr.Textbox(value="", visible=False)
     s3_logs_output_textbox = gr.Textbox(label="S3 logs", visible=False)
     gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
+    gr.Markdown(f"""Chat with PDF, web page or (new) csv/Excel documents. The default is a small model ({SMALL_MODEL_NAME}), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative ({LARGE_MODEL_NAME}), can reason a little better, but is much slower (See Advanced settings tab).\n\nBy default '[{DEFAULT_DATA_SOURCE_NAME}]({DEFAULT_DATA_SOURCE})' is loaded.If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.""")
     with gr.Row():
+        current_source = gr.Textbox(label="Current data source(s)", value=DEFAULT_DATA_SOURCE, scale = 10)
         current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
     with gr.Tab("Chatbot"):
         with gr.Row():
             submit = gr.Button(value="Send message", variant="primary", scale = 4)
             clear = gr.Button(value="Clear chat", variant="secondary", scale=1)
+            stop = gr.Button(value="Stop generating", variant="stop", scale=1)
+        examples_set = gr.Radio(label="Example questions",
+            choices=default_examples_set)
+        current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here", visible=False)
     with gr.Tab("Load in a different file to chat with"):
         with gr.Accordion("PDF file", open = False):
         out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
         temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
         with gr.Row():
+            model_choice = gr.Radio(label="Choose a chat model", value=SMALL_MODEL_NAME, choices = default_model_choices)
+            in_api_key = gr.Textbox(value = "", label="Enter Gemini API key (only if using Google API models)", lines=1, type="password",interactive=True, visible=True)
             change_model_button = gr.Button(value="Load model", scale=0)
         with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
             gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
         load_text = gr.Text(label="Load status")
     gr.HTML(
+        "<center>This app is powered by Gradio, Transformers, and Llama.cpp.</center>"
     )
     examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
     ###
     # CHAT PAGE
     ###
     # Click to send message
+    response_click = submit.click(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_model_object_state, model_type_state, out_passages, in_api_key], outputs=[chat_history_state, sources, instruction_prompt_out, relevant_query_state], queue=False, api_name="retrieval").\
                 success(chatf.turn_off_interactivity, inputs=None, outputs=[message, submit], queue=False).\
+                success(chatf.produce_streaming_answer_chatbot, inputs=[chatbot, instruction_prompt_out, model_type_state, temp_slide, relevant_query_state, chat_history_state, in_api_key], outputs=chatbot)
     response_click.success(chatf.highlight_found_text, [chatbot, sources], [sources]).\
                 success(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
                 success(lambda: chatf.restore_interactivity(), None, [message, submit], queue=False)
     # Press enter to send message
+    response_enter = message.submit(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_model_object_state, model_type_state, out_passages, in_api_key], outputs=[chat_history_state, sources, instruction_prompt_out, relevant_query_state], queue=False).\
                 success(chatf.turn_off_interactivity, inputs=None, outputs=[message, submit], queue=False).\
+                success(chatf.produce_streaming_answer_chatbot, [chatbot, instruction_prompt_out, model_type_state, temp_slide, relevant_query_state, chat_history_state, in_api_key], chatbot)
     response_enter.success(chatf.highlight_found_text, [chatbot, sources], [sources]).\
                 success(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
                 success(lambda: chatf.restore_interactivity(), None, [message, submit], queue=False)
     # Load in a pdf
     load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
              success(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
+             success(embed_faiss_save_to_zip, inputs=[ingest_docs, output_folder_textbox, embeddings_model_object_state], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
              success(chatf.hide_block, outputs = [examples_set])
     # Load in a webpage
     load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
              success(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
+             success(embed_faiss_save_to_zip, inputs=[ingest_docs, output_folder_textbox, embeddings_model_object_state], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
              success(chatf.hide_block, outputs = [examples_set])
     # Load in a csv/excel file
     load_csv_click = load_csv.click(ing.parse_csv_or_excel, inputs=[in_csv, in_text_column], outputs=[ingest_text, current_source]).\
              success(ing.csv_excel_text_to_docs, inputs=[ingest_text, in_text_column], outputs=[ingest_docs]).\
+             success(embed_faiss_save_to_zip, inputs=[ingest_docs, output_folder_textbox, embeddings_model_object_state], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
              success(chatf.hide_block, outputs = [examples_set])
     ###
     # LOGGING AND ON APP LOAD FUNCTIONS
+    ###
+    # Load in default model and embeddings for each user
+    app.load(get_connection_params, inputs=None, outputs=[session_hash_state, output_folder_textbox, session_hash_textbox, input_folder_textbox]).\
+    success(load_model, inputs=[model_type_state, gpu_layer_choice, gpu_config_state, cpu_config_state, torch_device_state], outputs=[model_type_state, load_text, current_model]).\
+    success(get_faiss_store, inputs=[default_embeddings_store_text, embeddings_model_object_state], outputs=[vectorstore_state])
     # Log usernames and times of access to file (to know who is using the app when running on AWS)
     access_callback = gr.CSVLogger()
     success(fn = upload_file_to_s3, inputs=[access_logs_state, access_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
 if __name__ == "__main__":
+    if COGNITO_AUTH == "1":
+        app.queue(max_size=int(MAX_QUEUE_SIZE), default_concurrency_limit=int(DEFAULT_CONCURRENCY_LIMIT)).launch(show_error=True, inbrowser=True, auth=authenticate_user, max_file_size=MAX_FILE_SIZE, server_port=GRADIO_SERVER_PORT, root_path=ROOT_PATH)
     else:
+        app.queue(max_size=int(MAX_QUEUE_SIZE), default_concurrency_limit=int(DEFAULT_CONCURRENCY_LIMIT)).launch(show_error=True, inbrowser=True, max_file_size=MAX_FILE_SIZE, server_port=GRADIO_SERVER_PORT, root_path=ROOT_PATH)

chatfuncs/chatfuncs.py CHANGED Viewed

@@ -5,21 +5,26 @@ from typing import Type, Dict, List, Tuple
 import time
 from itertools import compress
 import pandas as pd
-import numpy as np
 import google.generativeai as ai
 from gradio import Progress
 import boto3
 import json
 # Model packages
 import torch.cuda
 from threading import Thread
 from transformers import pipeline, TextIteratorStreamer
-from langchain_huggingface import HuggingFaceEmbeddings
-# Alternative model sources
-#from dataclasses import asdict, dataclass
 # Langchain functions
 from langchain.prompts import PromptTemplate
 from langchain_community.vectorstores import FAISS
@@ -27,13 +32,13 @@ from langchain_community.retrievers import SVMRetriever
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
-from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION
 model_object = [] # Define empty list for model functions to run
 tokenizer = [] # Define empty list for model functions to run
-from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
 # ResponseObject class for AWS Bedrock calls
 class ResponseObject:
     def __init__(self, text, usage_metadata):
@@ -42,30 +47,12 @@ class ResponseObject:
 bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_DEFAULT_REGION)
-# For keyword extraction (not currently used)
-#import nltk
-#nltk.download('wordnet')
-from nltk.corpus import stopwords
-from nltk.tokenize import RegexpTokenizer
-from nltk.stem import WordNetLemmatizer
-from keybert import KeyBERT
-# For Name Entity Recognition model
-#from span_marker import SpanMarkerModel # Not currently used
-# For BM25 retrieval
-import bm25s
-import Stemmer
-from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca
-import gradio as gr
 torch.cuda.empty_cache()
 PandasDataFrame = Type[pd.DataFrame]
 embeddings = None  # global variable setup
 vectorstore = None # global variable setup
 model_type = None # global variable setup
@@ -73,7 +60,6 @@ max_memory_length = 0 # How long should the memory of the conversation last?
 source_texts = "" # Define dummy source text (full text) just to enable highlight function to load
 ## Highlight text constants
 hlt_chunk_size = 12
 hlt_strat = [" ", ". ", "! ", "? ", ": ", "\n\n", "\n", ", "]
@@ -88,37 +74,51 @@ kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniL
 # Vectorstore funcs
-def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
-    print(f"> Total split documents: {len(docs_out)}")
-    vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
-    '''
-    #with open("vectorstore.pkl", "wb") as f:
-        #pickle.dump(vectorstore, f)
-    '''
-    #if Path(save_to).exists():
-    #    vectorstore_func.save_local(folder_path=save_to)
-    #else:
-    #    os.mkdir(save_to)
-    #    vectorstore_func.save_local(folder_path=save_to)
-    global vectorstore
-    vectorstore = vectorstore_func
-    out_message = "Document processing complete"
-    #print(out_message)
-    #print(f"> Saved to: {save_to}")
-    return out_message
 # Prompt functions
-def base_prompt_templates(model_type:str = "Qwen 2 0.5B (small, fast)"):
     #EXAMPLE_PROMPT = PromptTemplate(
     #    template="\nCONTENT:\n\n{page_content}\n\nSOURCE: {source}\n\n",
@@ -132,9 +132,9 @@ def base_prompt_templates(model_type:str = "Qwen 2 0.5B (small, fast)"):
 # The main prompt:
-    if model_type == "Qwen 2 0.5B (small, fast)":
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_qwen, input_variables=['question', 'summaries'])
-    elif model_type == "Phi 3.5 Mini (larger, slow)":
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_phi3, input_variables=['question', 'summaries'])
     else:
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_orca, input_variables=['question', 'summaries'])
@@ -146,7 +146,7 @@ def write_out_metadata_as_string(metadata_in:str):
     metadata_string = [f"{'  '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}" for d in metadata_in] # ['metadata']
     return metadata_string
-def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, content_prompt:str, extracted_memory:list, vectorstore:object, embeddings:object, relevant_flag:bool = True, out_passages:int = 2): # ,
     question =  inputs["question"]
     chat_history = inputs["chat_history"]
@@ -172,7 +172,7 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, con
     # Only expand passages if not tabular data
     if (file_type != ".csv") & (file_type != ".xlsx"):
-        docs_keep_as_doc, doc_df = get_expanded_passages(vectorstore, docs_keep_out, width=3)
     # Build up sources content to add to user display
     doc_df['meta_clean'] = write_out_metadata_as_string(doc_df["metadata"]) # [f"<b>{'  '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}</b>" for d in doc_df['metadata']]
@@ -188,9 +188,6 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, con
     sources_docs_content_string = '<br><br>'.join(doc_df['content_meta'])#.replace("  "," ")#.strip()
     instruction_prompt_out = instruction_prompt.format(question=new_question_kworded, summaries=docs_content_string)
-    print('Final prompt is: ')
-    print(instruction_prompt_out)
     return instruction_prompt_out, sources_docs_content_string, new_question_kworded
@@ -201,9 +198,11 @@ def create_full_prompt(user_input:str,
                        embeddings:object,
                        model_type:str,
                        out_passages:list[str],
-                       api_model_choice=None,
-                       api_key=None,
-                       relevant_flag = True):
     #if chain_agent is None:
     #    history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
@@ -211,14 +210,6 @@ def create_full_prompt(user_input:str,
     print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
     history = history or []
-    if api_model_choice and api_model_choice != "None":
-         print("API model choice detected")
-         if api_key:
-            print("API key detected")
-            return history, "", None, relevant_flag
-         else:
-            return history, "", None, relevant_flag
     # Create instruction prompt
     instruction_prompt, content_prompt = base_prompt_templates(model_type=model_type)
@@ -228,17 +219,12 @@ def create_full_prompt(user_input:str,
         relevant_flag = False
     else:
         relevant_flag = True
-    print("User input:", user_input)
     instruction_prompt_out, docs_content_string, new_question_kworded =\
                 generate_expanded_prompt({"question": user_input, "chat_history": history}, #vectorstore,
                                     instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings, relevant_flag, out_passages)
     history.append({"metadata":None, "options":None, "role": 'user', "content": user_input})
-    print("Output history is:", history)
-    print("Final prompt to model is:",instruction_prompt_out)
     return history, docs_content_string, instruction_prompt_out, relevant_flag
@@ -457,13 +443,13 @@ def produce_streaming_answer_chatbot(
             temperature:float=temperature,
             relevant_query_bool:bool=True,
             chat_history:list[dict]=[{"metadata":None, "options":None, "role": 'user', "content": ""}],
             max_new_tokens:int=max_new_tokens,
             sample:bool=sample,
             repetition_penalty:float=repetition_penalty,
             top_p:float=top_p,
             top_k:float=top_k,
-            max_tokens:int=max_tokens,
-            in_api_key:str=GEMINI_API_KEY
 ):
     #print("Model type is: ", model_type)
@@ -483,9 +469,8 @@ def produce_streaming_answer_chatbot(
         yield history
         return
-    if model_type == "Qwen 2 0.5B (small, fast)":
-        print("tokenizer:", tokenizer)
         # Get the model and tokenizer, and tokenize the user text.
         model_inputs = tokenizer(text=full_prompt, return_tensors="pt", return_attention_mask=False).to(torch_device)
@@ -503,8 +488,6 @@ def produce_streaming_answer_chatbot(
             top_k=top_k
         )
-        print("model_object:", model_object)
         t = Thread(target=model_object.generate, kwargs=generate_kwargs)
         t.start()
@@ -521,6 +504,7 @@ def produce_streaming_answer_chatbot(
                     new_text = ""
                 history[-1]['content'] += new_text
                 NUM_TOKENS += 1
                 yield history
             except Exception as e:
                 print(f"Error during text generation: {e}")
@@ -533,7 +517,7 @@ def produce_streaming_answer_chatbot(
         print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
         print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
-    elif model_type == "Phi 3.5 Mini (larger, slow)":
         #tokens = model.tokenize(full_prompt)
         gen_config = CtransGenGenerationConfig()
@@ -556,6 +540,7 @@ def produce_streaming_answer_chatbot(
             if "choices" in out and len(out["choices"]) > 0 and "text" in out["choices"][0]:
                 history[-1]['content'] += out["choices"][0]["text"]
                 NUM_TOKENS+=1
                 yield history
             else:
                 print(f"Unexpected output structure: {out}")
@@ -602,6 +587,11 @@ def produce_streaming_answer_chatbot(
             yield history
     elif "gemini" in model_type:
         print("Using Gemini model:", model_type)
         print("full_prompt:", full_prompt)
@@ -610,7 +600,7 @@ def produce_streaming_answer_chatbot(
         system_prompt = "You are answering questions from the user based on source material. Respond with short, factually correct answers."
-        model, config = construct_gemini_generative_model(GEMINI_API_KEY, temperature, model_type, system_prompt, max_tokens)
         responses, summary_conversation_history, whole_summary_conversation, whole_conversation_metadata = process_requests(full_prompt, system_prompt, conversation_history=[], whole_conversation=[], whole_conversation_metadata=[], model=model, config = config, model_choice = model_type, temperature = temperature)
@@ -977,13 +967,9 @@ def highlight_found_text(chat_history: list[dict], source_texts: list[dict], hlt
     response_text = next(
     (entry['content'] for entry in reversed(chat_history) if entry.get('role') == 'assistant'),
     "")
-    print("response_text:", response_text)
     source_texts = extract_text_from_input(source_texts)
-    print("source_texts:", source_texts)
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=hlt_chunk_size,
         separators=hlt_strat,
@@ -1025,8 +1011,6 @@ def highlight_found_text(chat_history: list[dict], source_texts: list[dict], hlt
     out_pos_tokens = "".join(pos_tokens)
-    print("out_pos_tokens:", out_pos_tokens)
     return out_pos_tokens

 import time
 from itertools import compress
 import pandas as pd
 import google.generativeai as ai
+import gradio as gr
 from gradio import Progress
 import boto3
 import json
+from nltk.corpus import stopwords
+from nltk.tokenize import RegexpTokenizer
+from nltk.stem import WordNetLemmatizer
+from keybert import KeyBERT
+# For Name Entity Recognition model
+#from span_marker import SpanMarkerModel # Not currently used
+# For BM25 retrieval
+import bm25s
+import Stemmer
 # Model packages
 import torch.cuda
 from threading import Thread
 from transformers import pipeline, TextIteratorStreamer
 # Langchain functions
 from langchain.prompts import PromptTemplate
 from langchain_community.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
+from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma
+from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
+from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME
 model_object = [] # Define empty list for model functions to run
 tokenizer = [] # Define empty list for model functions to run
 # ResponseObject class for AWS Bedrock calls
 class ResponseObject:
     def __init__(self, text, usage_metadata):
 bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_DEFAULT_REGION)
 torch.cuda.empty_cache()
 PandasDataFrame = Type[pd.DataFrame]
 embeddings = None  # global variable setup
+embeddings_model = None  # global variable setup
 vectorstore = None # global variable setup
 model_type = None # global variable setup
 source_texts = "" # Define dummy source text (full text) just to enable highlight function to load
 ## Highlight text constants
 hlt_chunk_size = 12
 hlt_strat = [" ", ". ", "! ", "? ", ": ", "\n\n", "\n", ", "]
 # Vectorstore funcs
+# def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
+#     print(f"> Total split documents: {len(docs_out)}")
+#     vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
+#     '''
+#     #with open("vectorstore.pkl", "wb") as f:
+#         #pickle.dump(vectorstore, f)
+#     '''
+#     #if Path(save_to).exists():
+#     #    vectorstore_func.save_local(folder_path=save_to)
+#     #else:
+#     #    os.mkdir(save_to)
+#     #    vectorstore_func.save_local(folder_path=save_to)
+#     global vectorstore
+#     vectorstore = vectorstore_func
+#     out_message = "Document processing complete"
+#     #print(out_message)
+#     #print(f"> Saved to: {save_to}")
+#     return out_message
+# def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings_model=embeddings_model):
+#     print(f"> Total split documents: {len(docs_out)}")
+#     print(docs_out)
+#     vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings_model)
+#     vectorstore = vectorstore_func
+#     out_message = "Document processing complete"
+#     return out_message, vectorstore_func
 # Prompt functions
+def base_prompt_templates(model_type:str = SMALL_MODEL_NAME):
     #EXAMPLE_PROMPT = PromptTemplate(
     #    template="\nCONTENT:\n\n{page_content}\n\nSOURCE: {source}\n\n",
 # The main prompt:
+    if model_type == SMALL_MODEL_NAME:
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_qwen, input_variables=['question', 'summaries'])
+    elif model_type == LARGE_MODEL_NAME:
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_phi3, input_variables=['question', 'summaries'])
     else:
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_orca, input_variables=['question', 'summaries'])
     metadata_string = [f"{'  '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}" for d in metadata_in] # ['metadata']
     return metadata_string
+def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, content_prompt:str, extracted_memory:list, vectorstore:object, embeddings:object, relevant_flag:bool = True, out_passages:int = 2, total_output_passage_chunks_size:int=5): # ,
     question =  inputs["question"]
     chat_history = inputs["chat_history"]
     # Only expand passages if not tabular data
     if (file_type != ".csv") & (file_type != ".xlsx"):
+        docs_keep_as_doc, doc_df = get_expanded_passages(vectorstore, docs_keep_out, width=total_output_passage_chunks_size)
     # Build up sources content to add to user display
     doc_df['meta_clean'] = write_out_metadata_as_string(doc_df["metadata"]) # [f"<b>{'  '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}</b>" for d in doc_df['metadata']]
     sources_docs_content_string = '<br><br>'.join(doc_df['content_meta'])#.replace("  "," ")#.strip()
     instruction_prompt_out = instruction_prompt.format(question=new_question_kworded, summaries=docs_content_string)
     return instruction_prompt_out, sources_docs_content_string, new_question_kworded
                        embeddings:object,
                        model_type:str,
                        out_passages:list[str],
+                       api_key:str="",
+                       relevant_flag:bool=True):
+    if "gemini" in model_type and not GEMINI_API_KEY and not api_key:
+        raise Exception("Gemini model selected but no API key found. Please enter an API key on the Advanced settings page.")
     #if chain_agent is None:
     #    history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
     print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
     history = history or []
     # Create instruction prompt
     instruction_prompt, content_prompt = base_prompt_templates(model_type=model_type)
         relevant_flag = False
     else:
         relevant_flag = True
     instruction_prompt_out, docs_content_string, new_question_kworded =\
                 generate_expanded_prompt({"question": user_input, "chat_history": history}, #vectorstore,
                                     instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings, relevant_flag, out_passages)
     history.append({"metadata":None, "options":None, "role": 'user', "content": user_input})
     return history, docs_content_string, instruction_prompt_out, relevant_flag
             temperature:float=temperature,
             relevant_query_bool:bool=True,
             chat_history:list[dict]=[{"metadata":None, "options":None, "role": 'user', "content": ""}],
+            in_api_key:str=GEMINI_API_KEY,
             max_new_tokens:int=max_new_tokens,
             sample:bool=sample,
             repetition_penalty:float=repetition_penalty,
             top_p:float=top_p,
             top_k:float=top_k,
+            max_tokens:int=max_tokens
 ):
     #print("Model type is: ", model_type)
         yield history
         return
+    if model_type == SMALL_MODEL_NAME:
         # Get the model and tokenizer, and tokenize the user text.
         model_inputs = tokenizer(text=full_prompt, return_tensors="pt", return_attention_mask=False).to(torch_device)
             top_k=top_k
         )
         t = Thread(target=model_object.generate, kwargs=generate_kwargs)
         t.start()
                     new_text = ""
                 history[-1]['content'] += new_text
                 NUM_TOKENS += 1
+                history[-1]['content'] = history[-1]['content'].replace('<|im_end|>','')
                 yield history
             except Exception as e:
                 print(f"Error during text generation: {e}")
         print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
         print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
+    elif model_type == LARGE_MODEL_NAME:
         #tokens = model.tokenize(full_prompt)
         gen_config = CtransGenGenerationConfig()
             if "choices" in out and len(out["choices"]) > 0 and "text" in out["choices"][0]:
                 history[-1]['content'] += out["choices"][0]["text"]
                 NUM_TOKENS+=1
+                history[-1]['content'] = history[-1]['content'].replace('<|im_end|>','')
                 yield history
             else:
                 print(f"Unexpected output structure: {out}")
             yield history
     elif "gemini" in model_type:
+        if in_api_key: gemini_api_key = in_api_key
+        elif GEMINI_API_KEY: gemini_api_key = GEMINI_API_KEY
+        else: raise Exception("Gemini API key not found. Please enter a key on the Advanced settings page or select another model type")
         print("Using Gemini model:", model_type)
         print("full_prompt:", full_prompt)
         system_prompt = "You are answering questions from the user based on source material. Respond with short, factually correct answers."
+        model, config = construct_gemini_generative_model(gemini_api_key, temperature, model_type, system_prompt, max_tokens)
         responses, summary_conversation_history, whole_summary_conversation, whole_conversation_metadata = process_requests(full_prompt, system_prompt, conversation_history=[], whole_conversation=[], whole_conversation_metadata=[], model=model, config = config, model_choice = model_type, temperature = temperature)
     response_text = next(
     (entry['content'] for entry in reversed(chat_history) if entry.get('role') == 'assistant'),
     "")
     source_texts = extract_text_from_input(source_texts)
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=hlt_chunk_size,
         separators=hlt_strat,
     out_pos_tokens = "".join(pos_tokens)
     return out_pos_tokens

chatfuncs/config.py CHANGED Viewed

@@ -165,6 +165,8 @@ DISPLAY_FILE_NAMES_IN_LOGS = get_or_create_env_var('DISPLAY_FILE_NAMES_IN_LOGS',
 # RUN CONFIG
 GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
 # Number of pages to loop through before breaking the function and restarting from the last finished page (not currently activated).
 PAGE_BREAK_VALUE = get_or_create_env_var('PAGE_BREAK_VALUE', '99999')
@@ -175,6 +177,37 @@ MAX_TIME_VALUE = get_or_create_env_var('MAX_TIME_VALUE', '999999')
 # APP RUN CONFIG
 ###
 # Get some environment variables and Launch the Gradio app
 COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')

 # RUN CONFIG
 GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
+HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
 # Number of pages to loop through before breaking the function and restarting from the last finished page (not currently activated).
 PAGE_BREAK_VALUE = get_or_create_env_var('PAGE_BREAK_VALUE', '99999')
 # APP RUN CONFIG
 ###
+SMALL_MODEL_NAME = get_or_create_env_var("SMALL_MODEL_NAME", "Gemma 3 1B (small, fast)") # "Qwen 2 0.5B (small, fast)"
+SMALL_MODEL_REPO_ID = get_or_create_env_var("SMALL_MODEL_REPO_ID", 'google/gemma-3-1b-it') #'Qwen/Qwen2-0.5B-Instruct')
+LARGE_MODEL_NAME = get_or_create_env_var("LARGE_MODEL_NAME", "Phi 3.5 Mini (larger, slow)")
+LARGE_MODEL_REPO_ID = get_or_create_env_var("LARGE_MODEL_REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF") # "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
+LARGE_MODEL_GGUF_FILE = get_or_create_env_var("LARGE_MODEL_GGUF_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf")  #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
+if RUN_AWS_FUNCTIONS == "1":
+    default_model_choices = f'["{SMALL_MODEL_NAME}", "{LARGE_MODEL_NAME}", "gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25", "anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"]'
+else:
+    default_model_choices = f'["{SMALL_MODEL_NAME}", "{LARGE_MODEL_NAME}", "gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]'
+DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", default_model_choices)
+EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "BAAI/bge-base-en-v1.5") #"mixedbread-ai/mxbai-embed-xsmall-v1"
+DEFAULT_EMBEDDINGS_LOCATION = get_or_create_env_var('DEFAULT_EMBEDDINGS_LOCATION', "faiss_embedding")
+DEFAULT_DATA_SOURCE_NAME = get_or_create_env_var('DEFAULT_DATA_SOURCE_NAME', "Document redaction app documentation")
+DEFAULT_DATA_SOURCE = get_or_create_env_var('DEFAULT_DATA_SOURCE', "https://seanpedrick-case.github.io/doc_redaction/README.html")
+DEFAULT_EXAMPLES = get_or_create_env_var('DEFAULT_EXAMPLES', '[ "How can I make a custom deny list?", "How can I find page duplicates?", "How can I review and modify existing redactions?", "How can I export my review files to Adobe?"]')
+                #
+                # ') # ["What were the five pillars of the previous borough plan?",
+                #"What is the vision statement for Lambeth?",
+                #"What are the commitments for Lambeth?",
+                #"What are the 2030 outcomes for Lambeth?"]
 # Get some environment variables and Launch the Gradio app
 COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')

chatfuncs/helper_functions.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import os
 import gradio as gr
 import pandas as pd
 def get_or_create_env_var(var_name, default_value):
     # Get the environment variable if it exists
@@ -13,12 +16,6 @@ def get_or_create_env_var(var_name, default_value):
     return value
-# Retrieving or setting output folder
-env_var_name = 'GRADIO_OUTPUT_FOLDER'
-default_value = 'output/'
-output_folder = get_or_create_env_var(env_var_name, default_value)
-print(f'The value of {env_var_name} is {output_folder}')
 def get_file_path_with_extension(file_path):
     # First, get the basename of the file (e.g., "example.txt" from "/path/to/example.txt")
@@ -165,64 +162,129 @@ def wipe_logs(feedback_logs_loc, usage_logs_loc):
-async def get_connection_params(request: gr.Request):
-    base_folder = ""
-    if request:
-        #print("request user:", request.username)
-        #request_data = await request.json()  # Parse JSON body
-        #print("All request data:", request_data)
-        #context_value = request_data.get('context')
-        #if 'context' in request_data:
-        #     print("Request context dictionary:", request_data['context'])
-        # print("Request headers dictionary:", request.headers)
-        # print("All host elements", request.client)
-        # print("IP address:", request.client.host)
-        # print("Query parameters:", dict(request.query_params))
-        # To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/
-        #print("Request dictionary to object:", request.request.body())
-        print("Session hash:", request.session_hash)
-        # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER
-        CUSTOM_CLOUDFRONT_HEADER_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER', '')
-        #print(f'The value of CUSTOM_CLOUDFRONT_HEADER is {CUSTOM_CLOUDFRONT_HEADER_var}')
-        # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER_VALUE
-        CUSTOM_CLOUDFRONT_HEADER_VALUE_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER_VALUE', '')
-        #print(f'The value of CUSTOM_CLOUDFRONT_HEADER_VALUE_var is {CUSTOM_CLOUDFRONT_HEADER_VALUE_var}')
-        if CUSTOM_CLOUDFRONT_HEADER_var and CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
-            if CUSTOM_CLOUDFRONT_HEADER_var in request.headers:
-                supplied_cloudfront_custom_value = request.headers[CUSTOM_CLOUDFRONT_HEADER_var]
-                if supplied_cloudfront_custom_value == CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
-                    print("Custom Cloudfront header found:", supplied_cloudfront_custom_value)
                 else:
-                    raise(ValueError, "Custom Cloudfront header value does not match expected value.")
-        # Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
-        if request.username:
-            out_session_hash = request.username
-            base_folder = "user-files/"
-            print("Request username found:", out_session_hash)
-        elif 'x-cognito-id' in request.headers:
-            out_session_hash = request.headers['x-cognito-id']
-            base_folder = "user-files/"
-            print("Cognito ID found:", out_session_hash)
-        else:
-            out_session_hash = request.session_hash
-            base_folder = "temp-files/"
-            # print("Cognito ID not found. Using session hash as save folder:", out_session_hash)
-        output_folder = base_folder + out_session_hash + "/"
-        #if bucket_name:
-        #    print("S3 output folder is: " + "s3://" + bucket_name + "/" + output_folder)
-        return out_session_hash, output_folder, out_session_hash
-    else:
-        print("No session parameters found.")
-        return "",""

 import os
 import gradio as gr
 import pandas as pd
+import boto3
+from botocore.exceptions import ClientError
+from chatfuncs.config import CUSTOM_HEADER_VALUE, CUSTOM_HEADER, OUTPUT_FOLDER, INPUT_FOLDER, SESSION_OUTPUT_FOLDER, AWS_USER_POOL_ID
 def get_or_create_env_var(var_name, default_value):
     # Get the environment variable if it exists
     return value
 def get_file_path_with_extension(file_path):
     # First, get the basename of the file (e.g., "example.txt" from "/path/to/example.txt")
+# async def get_connection_params(request: gr.Request):
+#     base_folder = ""
+#     if request:
+#         #print("request user:", request.username)
+#         #request_data = await request.json()  # Parse JSON body
+#         #print("All request data:", request_data)
+#         #context_value = request_data.get('context')
+#         #if 'context' in request_data:
+#         #     print("Request context dictionary:", request_data['context'])
+#         # print("Request headers dictionary:", request.headers)
+#         # print("All host elements", request.client)
+#         # print("IP address:", request.client.host)
+#         # print("Query parameters:", dict(request.query_params))
+#         # To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/
+#         #print("Request dictionary to object:", request.request.body())
+#         print("Session hash:", request.session_hash)
+#         # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER
+#         CUSTOM_CLOUDFRONT_HEADER_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER', '')
+#         #print(f'The value of CUSTOM_CLOUDFRONT_HEADER is {CUSTOM_CLOUDFRONT_HEADER_var}')
+#         # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER_VALUE
+#         CUSTOM_CLOUDFRONT_HEADER_VALUE_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER_VALUE', '')
+#         #print(f'The value of CUSTOM_CLOUDFRONT_HEADER_VALUE_var is {CUSTOM_CLOUDFRONT_HEADER_VALUE_var}')
+#         if CUSTOM_CLOUDFRONT_HEADER_var and CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
+#             if CUSTOM_CLOUDFRONT_HEADER_var in request.headers:
+#                 supplied_cloudfront_custom_value = request.headers[CUSTOM_CLOUDFRONT_HEADER_var]
+#                 if supplied_cloudfront_custom_value == CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
+#                     print("Custom Cloudfront header found:", supplied_cloudfront_custom_value)
+#                 else:
+#                     raise(ValueError, "Custom Cloudfront header value does not match expected value.")
+#         # Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
+#         if request.username:
+#             out_session_hash = request.username
+#             base_folder = "user-files/"
+#             print("Request username found:", out_session_hash)
+#         elif 'x-cognito-id' in request.headers:
+#             out_session_hash = request.headers['x-cognito-id']
+#             base_folder = "user-files/"
+#             print("Cognito ID found:", out_session_hash)
+#         else:
+#             out_session_hash = request.session_hash
+#             base_folder = "temp-files/"
+#             # print("Cognito ID not found. Using session hash as save folder:", out_session_hash)
+#         output_folder = base_folder + out_session_hash + "/"
+#         #if bucket_name:
+#         #    print("S3 output folder is: " + "s3://" + bucket_name + "/" + output_folder)
+#         return out_session_hash, output_folder, out_session_hash
+#     else:
+#         print("No session parameters found.")
+#         return "",""
+async def get_connection_params(request: gr.Request,
+                                output_folder_textbox:str=OUTPUT_FOLDER,
+                                input_folder_textbox:str=INPUT_FOLDER,
+                                session_output_folder:str=SESSION_OUTPUT_FOLDER):
+    #print("Session hash:", request.session_hash)
+    if CUSTOM_HEADER and CUSTOM_HEADER_VALUE:
+            if CUSTOM_HEADER in request.headers:
+                supplied_custom_header_value = request.headers[CUSTOM_HEADER]
+                if supplied_custom_header_value == CUSTOM_HEADER_VALUE:
+                    print("Custom header supplied and matches CUSTOM_HEADER_VALUE")
                 else:
+                    print("Custom header value does not match expected value.")
+                    raise ValueError("Custom header value does not match expected value.")
+            else:
+                print("Custom header value not found.")
+                raise ValueError("Custom header value not found.")
+    # Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
+    if request.username:
+        out_session_hash = request.username
+        #print("Request username found:", out_session_hash)
+    elif 'x-cognito-id' in request.headers:
+        out_session_hash = request.headers['x-cognito-id']
+        #print("Cognito ID found:", out_session_hash)
+    elif 'x-amzn-oidc-identity' in request.headers:
+        out_session_hash = request.headers['x-amzn-oidc-identity']
+        # Fetch email address using Cognito client
+        cognito_client = boto3.client('cognito-idp')
+        try:
+            response = cognito_client.admin_get_user(
+                UserPoolId=AWS_USER_POOL_ID,  # Replace with your User Pool ID
+                Username=out_session_hash
+            )
+            email = next(attr['Value'] for attr in response['UserAttributes'] if attr['Name'] == 'email')
+            #print("Email address found:", email)
+            out_session_hash = email
+        except ClientError as e:
+            print("Error fetching user details:", e)
+            email = None
+        print("Cognito ID found:", out_session_hash)
+    else:
+        out_session_hash = request.session_hash
+    if session_output_folder == 'True':
+        output_folder = output_folder_textbox + out_session_hash + "/"
+        input_folder = input_folder_textbox + out_session_hash + "/"
+    else:
+        output_folder = output_folder_textbox
+        input_folder = input_folder_textbox
+    if not os.path.exists(output_folder): os.mkdir(output_folder)
+    if not os.path.exists(input_folder): os.mkdir(input_folder)
+    return out_session_hash, output_folder, out_session_hash, input_folder

chatfuncs/ingest.py CHANGED Viewed

@@ -7,13 +7,14 @@ import requests
 import pandas as pd
 import dateutil.parser
 from typing import Type, List
-import shutil
-from langchain_community.embeddings import HuggingFaceEmbeddings # HuggingFaceInstructEmbeddings,
 from langchain_community.vectorstores.faiss import FAISS
 #from langchain_community.vectorstores import Chroma
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
 from bs4 import BeautifulSoup
 from docx import Document as Doc
@@ -557,31 +558,24 @@ def docs_elements_from_csv_save(docs_path="documents.csv"):
 # ## Create embeddings and save faiss vector store to the path specified in `save_to`
-def load_embeddings(model_name = "BAAI/bge-base-en-v1.5"):
-    #if model_name == "hkunlp/instructor-large":
-    #    embeddings_func = HuggingFaceInstructEmbeddings(model_name=model_name,
-    #    embed_instruction="Represent the paragraph for retrieval: ",
-    #    query_instruction="Represent the question for retrieving supporting documents: "
-    #    )
-    #else:
-    embeddings_func = HuggingFaceEmbeddings(model_name=model_name)
-    global embeddings
-    embeddings = embeddings_func
-    return embeddings_func
-def embed_faiss_save_to_zip(docs_out, save_to="output", model_name="BAAI/bge-base-en-v1.5"):
-    load_embeddings(model_name=model_name)
     print(f"> Total split documents: {len(docs_out)}")
-    vectorstore = FAISS.from_documents(documents=docs_out, embedding=embeddings)
-    save_to_path = Path(save_to)
     save_to_path.mkdir(parents=True, exist_ok=True)
     vectorstore.save_local(folder_path=str(save_to_path))
@@ -619,20 +613,20 @@ def embed_faiss_save_to_zip(docs_out, save_to="output", model_name="BAAI/bge-bas
-def sim_search_local_saved_vec(query, k_val, save_to="faiss_lambeth_census_embedding"):
-    load_embeddings()
-    docsearch = FAISS.load_local(folder_path=save_to, embeddings=embeddings)
-    display(Markdown(question))
-    search = docsearch.similarity_search_with_score(query, k=k_val)
-    for item in search:
-        print(item[0].page_content)
-        print(f"Page: {item[0].metadata['source']}")
-        print(f"Date: {item[0].metadata['date']}")
-        print(f"Score: {item[1]}")
-        print("---")

 import pandas as pd
 import dateutil.parser
 from typing import Type, List
+#import shutil
+#from langchain_community.embeddings import HuggingFaceEmbeddings # HuggingFaceInstructEmbeddings,
 from langchain_community.vectorstores.faiss import FAISS
 #from langchain_community.vectorstores import Chroma
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
+#from chatfuncs.config import EMBEDDINGS_MODEL_NAME
 from bs4 import BeautifulSoup
 from docx import Document as Doc
 # ## Create embeddings and save faiss vector store to the path specified in `save_to`
+# def load_embeddings_model(embeddings_model = EMBEDDINGS_MODEL_NAME):
+#     embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_model)
+#     #global embeddings
+#     #embeddings = embeddings_func
+#     return embeddings_func
+def embed_faiss_save_to_zip(docs_out, save_folder, embeddings_model_object, save_to="faiss_embeddings", model_name="BAAI/bge-base-en-v1.5"):
+    #load_embeddings(model_name=model_name)
     print(f"> Total split documents: {len(docs_out)}")
+    vectorstore = FAISS.from_documents(documents=docs_out, embedding=embeddings_model_object)
+    save_to_path = Path(save_folder, save_to)
     save_to_path.mkdir(parents=True, exist_ok=True)
     vectorstore.save_local(folder_path=str(save_to_path))
+# def sim_search_local_saved_vec(query, k_val, save_to="faiss_lambeth_census_embedding"):
+#     load_embeddings()
+#     docsearch = FAISS.load_local(folder_path=save_to, embeddings=embeddings)
+#     display(Markdown(question))
+#     search = docsearch.similarity_search_with_score(query, k=k_val)
+#     for item in search:
+#         print(item[0].page_content)
+#         print(f"Page: {item[0].metadata['source']}")
+#         print(f"Date: {item[0].metadata['date']}")
+#         print(f"Score: {item[1]}")
+#         print("---")

chatfuncs/prompts.py CHANGED Viewed

@@ -71,4 +71,9 @@ Answer the QUESTION using information from the following CONTENT. Respond with s
 CONTENT: {summaries}
 QUESTION: {question}\n
 Answer:<|im_end|>
-<|im_start|>assistant\n"""

 CONTENT: {summaries}
 QUESTION: {question}\n
 Answer:<|im_end|>
+<|im_start|>assistant\n"""
+instruction_prompt_gemma = """Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.
+CONTENT: {summaries}
+QUESTION: {question}
+assistant:"""

faiss_embedding/faiss_embedding.zip CHANGED Viewed

Binary files a/faiss_embedding/faiss_embedding.zip and b/faiss_embedding/faiss_embedding.zip differ

requirements.txt CHANGED Viewed

@@ -4,7 +4,7 @@ langchain-community==0.3.22
 beautifulsoup4==4.13.4
 google-generativeai==0.8.5
 pandas==2.2.3
-transformers==4.41.2
 # For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 #-C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"

 beautifulsoup4==4.13.4
 google-generativeai==0.8.5
 pandas==2.2.3
+transformers==4.51.3
 # For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 #-C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"