seanpedrickcase commited on
Commit
03afd76
·
1 Parent(s): ee7464e

Refocused app for working with Document Redaction guide. Added Gemma 3 1b as base model. Various UI and config improvements

Browse files
.dockerignore CHANGED
@@ -9,4 +9,6 @@ bootstrapper.py
9
  build/*
10
  dist/*
11
  test/*
12
- config/*
 
 
 
9
  build/*
10
  dist/*
11
  test/*
12
+ config/*
13
+ output/*
14
+ input/*
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.zip filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -9,4 +9,6 @@ bootstrapper.py
9
  build/*
10
  dist/*
11
  test/*
12
- config/*
 
 
 
9
  build/*
10
  dist/*
11
  test/*
12
+ config/*
13
+ output/*
14
+ input/*
app.py CHANGED
@@ -4,18 +4,21 @@ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
4
  from langchain_community.vectorstores import FAISS
5
  import gradio as gr
6
  import pandas as pd
 
 
 
 
 
7
 
8
  from chatfuncs.ingest import embed_faiss_save_to_zip
9
 
10
- from chatfuncs.helper_functions import ensure_output_folder_exists, get_connection_params, output_folder, reveal_feedback_buttons, wipe_logs
11
  from chatfuncs.aws_functions import upload_file_to_s3
12
  from chatfuncs.auth import authenticate_user
13
- from chatfuncs.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH
14
-
15
- from llama_cpp import Llama
16
- from huggingface_hub import hf_hub_download
17
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
18
- import os
19
 
20
  PandasDataFrame = Type[pd.DataFrame]
21
 
@@ -27,83 +30,93 @@ access_logs_data_folder = ACCESS_LOGS_FOLDER
27
  feedback_data_folder = FEEDBACK_LOGS_FOLDER
28
  usage_data_folder = USAGE_LOGS_FOLDER
29
 
 
 
 
30
  # Disable cuda devices if necessary
31
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
32
 
33
- import chatfuncs.ingest as ing
34
 
35
  ###
36
  # Load preset embeddings, vectorstore, and model
37
  ###
38
 
39
- embeddings_name = "BAAI/bge-base-en-v1.5" #"mixedbread-ai/mxbai-embed-xsmall-v1"
40
 
41
- def load_embeddings(embeddings_name = embeddings_name):
42
 
43
- embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_name)
44
 
45
- global embeddings
46
 
47
- embeddings = embeddings_func
48
 
49
- return embeddings
50
 
51
- def get_faiss_store(faiss_vstore_folder,embeddings):
52
- import zipfile
53
  with zipfile.ZipFile(faiss_vstore_folder + '/' + faiss_vstore_folder + '.zip', 'r') as zip_ref:
54
  zip_ref.extractall(faiss_vstore_folder)
55
 
56
- faiss_vstore = FAISS.load_local(folder_path=faiss_vstore_folder, embeddings=embeddings, allow_dangerous_deserialization=True)
57
  os.remove(faiss_vstore_folder + "/index.faiss")
58
  os.remove(faiss_vstore_folder + "/index.pkl")
59
 
60
- global vectorstore
61
 
62
- vectorstore = faiss_vstore
63
 
64
- return vectorstore
65
 
66
- import chatfuncs.chatfuncs as chatf
67
- from chatfuncs.model_load import torch_device, gpu_config, cpu_config, context_length
 
68
 
69
- chatf.embeddings = load_embeddings(embeddings_name)
70
- chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
71
 
72
- def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
73
 
74
  print(f"> Total split documents: {len(docs_out)}")
75
 
76
  print(docs_out)
77
 
78
- vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
79
 
80
  chatf.vectorstore = vectorstore_func
81
 
82
  out_message = "Document processing complete"
83
 
84
  return out_message, vectorstore_func
85
- # Gradio chat
86
 
87
- def create_hf_model(model_name:str):
88
  if torch_device == "cuda":
89
  if "flan" in model_name:
90
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
91
  else:
92
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
 
 
 
93
  else:
94
  if "flan" in model_name:
95
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)#, torch_dtype=torch.float16)
96
- else:
97
- model = AutoModelForCausalLM.from_pretrained(model_name)#, trust_remote_code=True)#, torch_dtype=torch.float16)
 
 
 
98
 
99
- tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length)
 
 
 
100
 
101
  return model, tokenizer
102
 
103
  def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_config:dict=cpu_config, torch_device:str=torch_device):
104
  print("Loading model")
105
 
106
- if model_type == "Phi 3.5 Mini (larger, slow)":
107
  if torch_device == "cuda":
108
  gpu_config.update_gpu(gpu_layers)
109
  print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU.")
@@ -113,33 +126,30 @@ def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_c
113
 
114
  print("Loading with", cpu_config.n_gpu_layers, "model layers sent to GPU.")
115
 
116
- print(vars(gpu_config))
117
- print(vars(cpu_config))
118
-
119
  try:
120
  model = Llama(
121
  model_path=hf_hub_download(
122
- repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF"),# "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
123
- filename=os.environ.get("MODEL_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
124
  ),
125
  **vars(gpu_config) # change n_gpu_layers if you have more or less VRAM
126
  )
127
 
128
  except Exception as e:
129
- print("GPU load failed", e)
130
  model = Llama(
131
  model_path=hf_hub_download(
132
- repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF"), #"QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #, "microsoft/Phi-3-mini-4k-instruct-gguf"),#"QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
133
- filename=os.environ.get("MODEL_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf"), # "Phi-3-mini-128k-instruct.Q4_K_M.gguf") # , #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf"),#"mistral-7b-openorca.Q4_K_M.gguf"),
134
  ),
135
  **vars(cpu_config)
136
  )
137
 
138
  tokenizer = []
139
 
140
- if model_type == "Qwen 2 0.5B (small, fast)":
141
  # Huggingface chat model
142
- hf_checkpoint = 'Qwen/Qwen2-0.5B-Instruct'# 'declare-lab/flan-alpaca-large'#'declare-lab/flan-alpaca-base' # # # 'Qwen/Qwen1.5-0.5B-Chat' #
143
 
144
  model, tokenizer = create_hf_model(model_name = hf_checkpoint)
145
 
@@ -165,11 +175,9 @@ def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_c
165
  app = gr.Blocks(theme = gr.themes.Base(), fill_width=True)#css=".gradio-container {background-color: black}")
166
 
167
  with app:
168
- model_type = "Qwen 2 0.5B (small, fast)"
169
  load_model(model_type, 0, gpu_config, cpu_config, torch_device) # chatf.model_object, chatf.tokenizer, chatf.model_type =
170
 
171
- print("chatf.model_object:", chatf.model_object)
172
-
173
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
174
  #model_type = "Phi 3.5 Mini (larger, slow)"
175
  #load_model(model_type, gpu_layers, gpu_config, cpu_config, torch_device)
@@ -182,11 +190,16 @@ with app:
182
  gpu_config_state = gr.State(gpu_config)
183
  cpu_config_state = gr.State(cpu_config)
184
  torch_device_state = gr.State(torch_device)
185
- embeddings_state = gr.State(chatf.embeddings)#globals()["embeddings"])
186
- vectorstore_state = gr.State(chatf.vectorstore)#globals()["vectorstore"])
187
 
 
 
 
 
 
 
188
  relevant_query_state = gr.Checkbox(value=True, visible=False)
189
 
 
190
  model_state = gr.State() # chatf.model_object (gives error)
191
  tokenizer_state = gr.State() # chatf.tokenizer (gives error)
192
 
@@ -194,7 +207,8 @@ with app:
194
  instruction_prompt_out = gr.State()
195
 
196
  session_hash_state = gr.State()
197
- s3_output_folder_state = gr.State()
 
198
 
199
  session_hash_textbox = gr.Textbox(value="", visible=False)
200
  s3_logs_output_textbox = gr.Textbox(label="S3 logs", visible=False)
@@ -208,14 +222,11 @@ with app:
208
 
209
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
210
 
211
- gr.Markdown("Chat with PDF, web page or (new) csv/Excel documents. The default is a small model (Qwen 2 0.5B), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative (Phi 3.5 Mini (larger, slow)), can reason a little better, but is much slower (See Advanced tab).\n\nBy default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
212
-
213
- with gr.Accordion(label="Use Gemini or AWS Claude model", open=False, visible=False):
214
- api_model_choice = gr.Dropdown(value = "None", choices = ["gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25", "anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0", "None"], label="LLM model to use", multiselect=False, interactive=True, visible=False)
215
- in_api_key = gr.Textbox(value = "", label="Enter Gemini API key (only if using Google API models)", lines=1, type="password",interactive=True, visible=False)
216
 
217
  with gr.Row():
218
- current_source = gr.Textbox(label="Current data source(s)", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf", scale = 10)
219
  current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
220
 
221
  with gr.Tab("Chatbot"):
@@ -234,17 +245,12 @@ with app:
234
  with gr.Row():
235
  submit = gr.Button(value="Send message", variant="primary", scale = 4)
236
  clear = gr.Button(value="Clear chat", variant="secondary", scale=1)
237
- stop = gr.Button(value="Stop generating", variant="secondary", scale=1)
238
-
239
- examples_set = gr.Radio(label="Examples for the Lambeth Borough Plan",
240
- #value = "What were the five pillars of the previous borough plan?",
241
- choices=["What were the five pillars of the previous borough plan?",
242
- "What is the vision statement for Lambeth?",
243
- "What are the commitments for Lambeth?",
244
- "What are the 2030 outcomes for Lambeth?"])
245
-
246
- current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here", visible=False)
247
 
 
 
 
 
248
 
249
  with gr.Tab("Load in a different file to chat with"):
250
  with gr.Accordion("PDF file", open = False):
@@ -270,7 +276,8 @@ with app:
270
  out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
271
  temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
272
  with gr.Row():
273
- model_choice = gr.Radio(label="Choose a chat model", value="Qwen 2 0.5B (small, fast)", choices = ["Qwen 2 0.5B (small, fast)", "Phi 3.5 Mini (larger, slow)", "gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25", "anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"])
 
274
  change_model_button = gr.Button(value="Load model", scale=0)
275
  with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
276
  gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
@@ -278,28 +285,27 @@ with app:
278
  load_text = gr.Text(label="Load status")
279
 
280
  gr.HTML(
281
- "<center>This app is based on the models Qwen 2 0.5B and Phi 3.5 Mini. It powered by Gradio, Transformers, and Llama.cpp.</a></center>"
282
  )
283
 
284
  examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
285
 
286
-
287
  ###
288
  # CHAT PAGE
289
  ###
290
 
291
  # Click to send message
292
- response_click = submit.click(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages, api_model_choice, in_api_key], outputs=[chat_history_state, sources, instruction_prompt_out, relevant_query_state], queue=False, api_name="retrieval").\
293
  success(chatf.turn_off_interactivity, inputs=None, outputs=[message, submit], queue=False).\
294
- success(chatf.produce_streaming_answer_chatbot, inputs=[chatbot, instruction_prompt_out, model_type_state, temp_slide, relevant_query_state, chat_history_state], outputs=chatbot)
295
  response_click.success(chatf.highlight_found_text, [chatbot, sources], [sources]).\
296
  success(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
297
  success(lambda: chatf.restore_interactivity(), None, [message, submit], queue=False)
298
 
299
  # Press enter to send message
300
- response_enter = message.submit(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages, api_model_choice, in_api_key], outputs=[chat_history_state, sources, instruction_prompt_out, relevant_query_state], queue=False).\
301
  success(chatf.turn_off_interactivity, inputs=None, outputs=[message, submit], queue=False).\
302
- success(chatf.produce_streaming_answer_chatbot, [chatbot, instruction_prompt_out, model_type_state, temp_slide, relevant_query_state, chat_history_state], chatbot)
303
  response_enter.success(chatf.highlight_found_text, [chatbot, sources], [sources]).\
304
  success(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
305
  success(lambda: chatf.restore_interactivity(), None, [message, submit], queue=False)
@@ -322,19 +328,19 @@ with app:
322
  # Load in a pdf
323
  load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
324
  success(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
325
- success(embed_faiss_save_to_zip, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
326
  success(chatf.hide_block, outputs = [examples_set])
327
 
328
  # Load in a webpage
329
  load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
330
  success(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
331
- success(embed_faiss_save_to_zip, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
332
  success(chatf.hide_block, outputs = [examples_set])
333
 
334
  # Load in a csv/excel file
335
  load_csv_click = load_csv.click(ing.parse_csv_or_excel, inputs=[in_csv, in_text_column], outputs=[ingest_text, current_source]).\
336
  success(ing.csv_excel_text_to_docs, inputs=[ingest_text, in_text_column], outputs=[ingest_docs]).\
337
- success(embed_faiss_save_to_zip, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
338
  success(chatf.hide_block, outputs = [examples_set])
339
 
340
 
@@ -350,9 +356,11 @@ with app:
350
 
351
  ###
352
  # LOGGING AND ON APP LOAD FUNCTIONS
353
- ###
354
- app.load(get_connection_params, inputs=None, outputs=[session_hash_state, s3_output_folder_state, session_hash_textbox]).\
355
- success(load_model, inputs=[model_type_state, gpu_layer_choice, gpu_config_state, cpu_config_state, torch_device_state], outputs=[model_type_state, load_text, current_model])
 
 
356
 
357
  # Log usernames and times of access to file (to know who is using the app when running on AWS)
358
  access_callback = gr.CSVLogger()
@@ -362,7 +370,7 @@ with app:
362
  success(fn = upload_file_to_s3, inputs=[access_logs_state, access_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
363
 
364
  if __name__ == "__main__":
365
- if os.environ['COGNITO_AUTH'] == "1":
366
- app.queue().launch(show_error=True, auth=authenticate_user, max_file_size='50mb')
367
  else:
368
- app.queue().launch(show_error=True, inbrowser=True, max_file_size='50mb')
 
4
  from langchain_community.vectorstores import FAISS
5
  import gradio as gr
6
  import pandas as pd
7
+ from torch import float16
8
+ from llama_cpp import Llama
9
+ from huggingface_hub import hf_hub_download
10
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
11
+ import zipfile
12
 
13
  from chatfuncs.ingest import embed_faiss_save_to_zip
14
 
15
+ from chatfuncs.helper_functions import get_connection_params, reveal_feedback_buttons, wipe_logs
16
  from chatfuncs.aws_functions import upload_file_to_s3
17
  from chatfuncs.auth import authenticate_user
18
+ from chatfuncs.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH, INPUT_FOLDER, OUTPUT_FOLDER, MAX_QUEUE_SIZE, DEFAULT_CONCURRENCY_LIMIT, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, DEFAULT_EMBEDDINGS_LOCATION, EMBEDDINGS_MODEL_NAME, DEFAULT_DATA_SOURCE, HF_TOKEN, LARGE_MODEL_REPO_ID, LARGE_MODEL_GGUF_FILE, LARGE_MODEL_NAME, SMALL_MODEL_NAME, SMALL_MODEL_REPO_ID, DEFAULT_DATA_SOURCE_NAME, DEFAULT_EXAMPLES, DEFAULT_MODEL_CHOICES
19
+ from chatfuncs.model_load import torch_device, gpu_config, cpu_config, context_length
20
+ import chatfuncs.chatfuncs as chatf
21
+ import chatfuncs.ingest as ing
 
 
22
 
23
  PandasDataFrame = Type[pd.DataFrame]
24
 
 
30
  feedback_data_folder = FEEDBACK_LOGS_FOLDER
31
  usage_data_folder = USAGE_LOGS_FOLDER
32
 
33
+ if isinstance(DEFAULT_EXAMPLES, str): default_examples_set = eval(DEFAULT_EXAMPLES)
34
+ if isinstance(DEFAULT_MODEL_CHOICES, str): default_model_choices = eval(DEFAULT_MODEL_CHOICES)
35
+
36
  # Disable cuda devices if necessary
37
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
38
 
 
39
 
40
  ###
41
  # Load preset embeddings, vectorstore, and model
42
  ###
43
 
44
+ def load_embeddings_model(embeddings_model = EMBEDDINGS_MODEL_NAME):
45
 
46
+ embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_model)
47
 
48
+ #global embeddings
49
 
50
+ #embeddings = embeddings_func
51
 
52
+ return embeddings_func
53
 
54
+ def get_faiss_store(faiss_vstore_folder:str, embeddings_model:object):
55
 
 
 
56
  with zipfile.ZipFile(faiss_vstore_folder + '/' + faiss_vstore_folder + '.zip', 'r') as zip_ref:
57
  zip_ref.extractall(faiss_vstore_folder)
58
 
59
+ faiss_vstore = FAISS.load_local(folder_path=faiss_vstore_folder, embeddings=embeddings_model, allow_dangerous_deserialization=True)
60
  os.remove(faiss_vstore_folder + "/index.faiss")
61
  os.remove(faiss_vstore_folder + "/index.pkl")
62
 
63
+ #global vectorstore
64
 
65
+ #vectorstore = faiss_vstore
66
 
67
+ return faiss_vstore #vectorstore
68
 
69
+ # Load in default embeddings and embeddings model name
70
+ embeddings_model = load_embeddings_model(EMBEDDINGS_MODEL_NAME)
71
+ vectorstore = get_faiss_store(faiss_vstore_folder=DEFAULT_EMBEDDINGS_LOCATION,embeddings_model=embeddings_model)#globals()["embeddings"])
72
 
73
+ chatf.embeddings = embeddings_model
74
+ chatf.vectorstore = vectorstore
75
 
76
+ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings_model=embeddings_model):
77
 
78
  print(f"> Total split documents: {len(docs_out)}")
79
 
80
  print(docs_out)
81
 
82
+ vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings_model)
83
 
84
  chatf.vectorstore = vectorstore_func
85
 
86
  out_message = "Document processing complete"
87
 
88
  return out_message, vectorstore_func
89
+
90
 
91
+ def create_hf_model(model_name:str, hf_token=HF_TOKEN):
92
  if torch_device == "cuda":
93
  if "flan" in model_name:
94
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
95
  else:
96
+ if hf_token:
97
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", token=hf_token) # , torch_dtype=float16
98
+ else:
99
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") # , torch_dtype=float16
100
  else:
101
  if "flan" in model_name:
102
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)#, torch_dtype=torch.float16)
103
+ else:
104
+ if hf_token:
105
+ model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token) # , torch_dtype=float16
106
+ else:
107
+ model = AutoModelForCausalLM.from_pretrained(model_name) # , torch_dtype=float16
108
 
109
+ if hf_token:
110
+ tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length, token=hf_token)
111
+ else:
112
+ tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length)
113
 
114
  return model, tokenizer
115
 
116
  def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_config:dict=cpu_config, torch_device:str=torch_device):
117
  print("Loading model")
118
 
119
+ if model_type == LARGE_MODEL_NAME:
120
  if torch_device == "cuda":
121
  gpu_config.update_gpu(gpu_layers)
122
  print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU.")
 
126
 
127
  print("Loading with", cpu_config.n_gpu_layers, "model layers sent to GPU.")
128
 
 
 
 
129
  try:
130
  model = Llama(
131
  model_path=hf_hub_download(
132
+ repo_id=LARGE_MODEL_REPO_ID,
133
+ filename=LARGE_MODEL_GGUF_FILE
134
  ),
135
  **vars(gpu_config) # change n_gpu_layers if you have more or less VRAM
136
  )
137
 
138
  except Exception as e:
139
+ print("GPU load failed", e, "loading CPU version instead")
140
  model = Llama(
141
  model_path=hf_hub_download(
142
+ repo_id=LARGE_MODEL_REPO_ID,
143
+ filename=LARGE_MODEL_GGUF_FILE
144
  ),
145
  **vars(cpu_config)
146
  )
147
 
148
  tokenizer = []
149
 
150
+ if model_type == SMALL_MODEL_NAME:
151
  # Huggingface chat model
152
+ hf_checkpoint = SMALL_MODEL_REPO_ID# 'declare-lab/flan-alpaca-large'#'declare-lab/flan-alpaca-base' # # # 'Qwen/Qwen1.5-0.5B-Chat' #
153
 
154
  model, tokenizer = create_hf_model(model_name = hf_checkpoint)
155
 
 
175
  app = gr.Blocks(theme = gr.themes.Base(), fill_width=True)#css=".gradio-container {background-color: black}")
176
 
177
  with app:
178
+ model_type = SMALL_MODEL_NAME
179
  load_model(model_type, 0, gpu_config, cpu_config, torch_device) # chatf.model_object, chatf.tokenizer, chatf.model_type =
180
 
 
 
181
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
182
  #model_type = "Phi 3.5 Mini (larger, slow)"
183
  #load_model(model_type, gpu_layers, gpu_config, cpu_config, torch_device)
 
190
  gpu_config_state = gr.State(gpu_config)
191
  cpu_config_state = gr.State(cpu_config)
192
  torch_device_state = gr.State(torch_device)
 
 
193
 
194
+ # Embeddings related vars
195
+ embeddings_model_object_state = gr.State(embeddings_model)#globals()["embeddings"])
196
+ vectorstore_state = gr.State(vectorstore)#globals()["vectorstore"])
197
+ default_embeddings_store_text = gr.Textbox(value=DEFAULT_EMBEDDINGS_LOCATION, visible=False)
198
+
199
+ # Is the query relevant to the sources provided?
200
  relevant_query_state = gr.Checkbox(value=True, visible=False)
201
 
202
+ # Storing model objects in state doesn't seem to work, so we have to load in different models in roundabout ways
203
  model_state = gr.State() # chatf.model_object (gives error)
204
  tokenizer_state = gr.State() # chatf.tokenizer (gives error)
205
 
 
207
  instruction_prompt_out = gr.State()
208
 
209
  session_hash_state = gr.State()
210
+ output_folder_textbox = gr.Textbox(value=OUTPUT_FOLDER, visible=False)
211
+ input_folder_textbox = gr.Textbox(value=INPUT_FOLDER, visible=False)
212
 
213
  session_hash_textbox = gr.Textbox(value="", visible=False)
214
  s3_logs_output_textbox = gr.Textbox(label="S3 logs", visible=False)
 
222
 
223
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
224
 
225
+ gr.Markdown(f"""Chat with PDF, web page or (new) csv/Excel documents. The default is a small model ({SMALL_MODEL_NAME}), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative ({LARGE_MODEL_NAME}), can reason a little better, but is much slower (See Advanced settings tab).\n\nBy default '[{DEFAULT_DATA_SOURCE_NAME}]({DEFAULT_DATA_SOURCE})' is loaded.If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.""")
226
+
 
 
 
227
 
228
  with gr.Row():
229
+ current_source = gr.Textbox(label="Current data source(s)", value=DEFAULT_DATA_SOURCE, scale = 10)
230
  current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
231
 
232
  with gr.Tab("Chatbot"):
 
245
  with gr.Row():
246
  submit = gr.Button(value="Send message", variant="primary", scale = 4)
247
  clear = gr.Button(value="Clear chat", variant="secondary", scale=1)
248
+ stop = gr.Button(value="Stop generating", variant="stop", scale=1)
 
 
 
 
 
 
 
 
 
249
 
250
+ examples_set = gr.Radio(label="Example questions",
251
+ choices=default_examples_set)
252
+
253
+ current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here", visible=False)
254
 
255
  with gr.Tab("Load in a different file to chat with"):
256
  with gr.Accordion("PDF file", open = False):
 
276
  out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
277
  temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
278
  with gr.Row():
279
+ model_choice = gr.Radio(label="Choose a chat model", value=SMALL_MODEL_NAME, choices = default_model_choices)
280
+ in_api_key = gr.Textbox(value = "", label="Enter Gemini API key (only if using Google API models)", lines=1, type="password",interactive=True, visible=True)
281
  change_model_button = gr.Button(value="Load model", scale=0)
282
  with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
283
  gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
 
285
  load_text = gr.Text(label="Load status")
286
 
287
  gr.HTML(
288
+ "<center>This app is powered by Gradio, Transformers, and Llama.cpp.</center>"
289
  )
290
 
291
  examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
292
 
 
293
  ###
294
  # CHAT PAGE
295
  ###
296
 
297
  # Click to send message
298
+ response_click = submit.click(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_model_object_state, model_type_state, out_passages, in_api_key], outputs=[chat_history_state, sources, instruction_prompt_out, relevant_query_state], queue=False, api_name="retrieval").\
299
  success(chatf.turn_off_interactivity, inputs=None, outputs=[message, submit], queue=False).\
300
+ success(chatf.produce_streaming_answer_chatbot, inputs=[chatbot, instruction_prompt_out, model_type_state, temp_slide, relevant_query_state, chat_history_state, in_api_key], outputs=chatbot)
301
  response_click.success(chatf.highlight_found_text, [chatbot, sources], [sources]).\
302
  success(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
303
  success(lambda: chatf.restore_interactivity(), None, [message, submit], queue=False)
304
 
305
  # Press enter to send message
306
+ response_enter = message.submit(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_model_object_state, model_type_state, out_passages, in_api_key], outputs=[chat_history_state, sources, instruction_prompt_out, relevant_query_state], queue=False).\
307
  success(chatf.turn_off_interactivity, inputs=None, outputs=[message, submit], queue=False).\
308
+ success(chatf.produce_streaming_answer_chatbot, [chatbot, instruction_prompt_out, model_type_state, temp_slide, relevant_query_state, chat_history_state, in_api_key], chatbot)
309
  response_enter.success(chatf.highlight_found_text, [chatbot, sources], [sources]).\
310
  success(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
311
  success(lambda: chatf.restore_interactivity(), None, [message, submit], queue=False)
 
328
  # Load in a pdf
329
  load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
330
  success(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
331
+ success(embed_faiss_save_to_zip, inputs=[ingest_docs, output_folder_textbox, embeddings_model_object_state], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
332
  success(chatf.hide_block, outputs = [examples_set])
333
 
334
  # Load in a webpage
335
  load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
336
  success(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
337
+ success(embed_faiss_save_to_zip, inputs=[ingest_docs, output_folder_textbox, embeddings_model_object_state], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
338
  success(chatf.hide_block, outputs = [examples_set])
339
 
340
  # Load in a csv/excel file
341
  load_csv_click = load_csv.click(ing.parse_csv_or_excel, inputs=[in_csv, in_text_column], outputs=[ingest_text, current_source]).\
342
  success(ing.csv_excel_text_to_docs, inputs=[ingest_text, in_text_column], outputs=[ingest_docs]).\
343
+ success(embed_faiss_save_to_zip, inputs=[ingest_docs, output_folder_textbox, embeddings_model_object_state], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
344
  success(chatf.hide_block, outputs = [examples_set])
345
 
346
 
 
356
 
357
  ###
358
  # LOGGING AND ON APP LOAD FUNCTIONS
359
+ ###
360
+ # Load in default model and embeddings for each user
361
+ app.load(get_connection_params, inputs=None, outputs=[session_hash_state, output_folder_textbox, session_hash_textbox, input_folder_textbox]).\
362
+ success(load_model, inputs=[model_type_state, gpu_layer_choice, gpu_config_state, cpu_config_state, torch_device_state], outputs=[model_type_state, load_text, current_model]).\
363
+ success(get_faiss_store, inputs=[default_embeddings_store_text, embeddings_model_object_state], outputs=[vectorstore_state])
364
 
365
  # Log usernames and times of access to file (to know who is using the app when running on AWS)
366
  access_callback = gr.CSVLogger()
 
370
  success(fn = upload_file_to_s3, inputs=[access_logs_state, access_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
371
 
372
  if __name__ == "__main__":
373
+ if COGNITO_AUTH == "1":
374
+ app.queue(max_size=int(MAX_QUEUE_SIZE), default_concurrency_limit=int(DEFAULT_CONCURRENCY_LIMIT)).launch(show_error=True, inbrowser=True, auth=authenticate_user, max_file_size=MAX_FILE_SIZE, server_port=GRADIO_SERVER_PORT, root_path=ROOT_PATH)
375
  else:
376
+ app.queue(max_size=int(MAX_QUEUE_SIZE), default_concurrency_limit=int(DEFAULT_CONCURRENCY_LIMIT)).launch(show_error=True, inbrowser=True, max_file_size=MAX_FILE_SIZE, server_port=GRADIO_SERVER_PORT, root_path=ROOT_PATH)
chatfuncs/chatfuncs.py CHANGED
@@ -5,21 +5,26 @@ from typing import Type, Dict, List, Tuple
5
  import time
6
  from itertools import compress
7
  import pandas as pd
8
- import numpy as np
9
  import google.generativeai as ai
 
10
  from gradio import Progress
11
  import boto3
12
  import json
 
 
 
 
13
 
 
 
 
 
 
 
14
  # Model packages
15
  import torch.cuda
16
  from threading import Thread
17
  from transformers import pipeline, TextIteratorStreamer
18
- from langchain_huggingface import HuggingFaceEmbeddings
19
-
20
- # Alternative model sources
21
- #from dataclasses import asdict, dataclass
22
-
23
  # Langchain functions
24
  from langchain.prompts import PromptTemplate
25
  from langchain_community.vectorstores import FAISS
@@ -27,13 +32,13 @@ from langchain_community.retrievers import SVMRetriever
27
  from langchain.text_splitter import RecursiveCharacterTextSplitter
28
  from langchain.docstore.document import Document
29
 
30
- from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION
 
 
31
 
32
  model_object = [] # Define empty list for model functions to run
33
  tokenizer = [] # Define empty list for model functions to run
34
 
35
- from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
36
-
37
  # ResponseObject class for AWS Bedrock calls
38
  class ResponseObject:
39
  def __init__(self, text, usage_metadata):
@@ -42,30 +47,12 @@ class ResponseObject:
42
 
43
  bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_DEFAULT_REGION)
44
 
45
- # For keyword extraction (not currently used)
46
- #import nltk
47
- #nltk.download('wordnet')
48
- from nltk.corpus import stopwords
49
- from nltk.tokenize import RegexpTokenizer
50
- from nltk.stem import WordNetLemmatizer
51
- from keybert import KeyBERT
52
-
53
- # For Name Entity Recognition model
54
- #from span_marker import SpanMarkerModel # Not currently used
55
-
56
- # For BM25 retrieval
57
- import bm25s
58
- import Stemmer
59
-
60
- from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca
61
-
62
- import gradio as gr
63
-
64
  torch.cuda.empty_cache()
65
 
66
  PandasDataFrame = Type[pd.DataFrame]
67
 
68
  embeddings = None # global variable setup
 
69
  vectorstore = None # global variable setup
70
  model_type = None # global variable setup
71
 
@@ -73,7 +60,6 @@ max_memory_length = 0 # How long should the memory of the conversation last?
73
 
74
  source_texts = "" # Define dummy source text (full text) just to enable highlight function to load
75
 
76
-
77
  ## Highlight text constants
78
  hlt_chunk_size = 12
79
  hlt_strat = [" ", ". ", "! ", "? ", ": ", "\n\n", "\n", ", "]
@@ -88,37 +74,51 @@ kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniL
88
 
89
  # Vectorstore funcs
90
 
91
- def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
92
 
93
- print(f"> Total split documents: {len(docs_out)}")
94
 
95
- vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
96
 
97
- '''
98
- #with open("vectorstore.pkl", "wb") as f:
99
- #pickle.dump(vectorstore, f)
100
- '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- #if Path(save_to).exists():
103
- # vectorstore_func.save_local(folder_path=save_to)
104
- #else:
105
- # os.mkdir(save_to)
106
- # vectorstore_func.save_local(folder_path=save_to)
107
 
108
- global vectorstore
109
 
110
- vectorstore = vectorstore_func
111
 
112
- out_message = "Document processing complete"
113
 
114
- #print(out_message)
115
- #print(f"> Saved to: {save_to}")
116
 
117
- return out_message
 
 
118
 
119
  # Prompt functions
120
 
121
- def base_prompt_templates(model_type:str = "Qwen 2 0.5B (small, fast)"):
122
 
123
  #EXAMPLE_PROMPT = PromptTemplate(
124
  # template="\nCONTENT:\n\n{page_content}\n\nSOURCE: {source}\n\n",
@@ -132,9 +132,9 @@ def base_prompt_templates(model_type:str = "Qwen 2 0.5B (small, fast)"):
132
 
133
  # The main prompt:
134
 
135
- if model_type == "Qwen 2 0.5B (small, fast)":
136
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_qwen, input_variables=['question', 'summaries'])
137
- elif model_type == "Phi 3.5 Mini (larger, slow)":
138
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_phi3, input_variables=['question', 'summaries'])
139
  else:
140
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_orca, input_variables=['question', 'summaries'])
@@ -146,7 +146,7 @@ def write_out_metadata_as_string(metadata_in:str):
146
  metadata_string = [f"{' '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}" for d in metadata_in] # ['metadata']
147
  return metadata_string
148
 
149
- def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, content_prompt:str, extracted_memory:list, vectorstore:object, embeddings:object, relevant_flag:bool = True, out_passages:int = 2): # ,
150
 
151
  question = inputs["question"]
152
  chat_history = inputs["chat_history"]
@@ -172,7 +172,7 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, con
172
 
173
  # Only expand passages if not tabular data
174
  if (file_type != ".csv") & (file_type != ".xlsx"):
175
- docs_keep_as_doc, doc_df = get_expanded_passages(vectorstore, docs_keep_out, width=3)
176
 
177
  # Build up sources content to add to user display
178
  doc_df['meta_clean'] = write_out_metadata_as_string(doc_df["metadata"]) # [f"<b>{' '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}</b>" for d in doc_df['metadata']]
@@ -188,9 +188,6 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, con
188
  sources_docs_content_string = '<br><br>'.join(doc_df['content_meta'])#.replace(" "," ")#.strip()
189
 
190
  instruction_prompt_out = instruction_prompt.format(question=new_question_kworded, summaries=docs_content_string)
191
-
192
- print('Final prompt is: ')
193
- print(instruction_prompt_out)
194
 
195
  return instruction_prompt_out, sources_docs_content_string, new_question_kworded
196
 
@@ -201,9 +198,11 @@ def create_full_prompt(user_input:str,
201
  embeddings:object,
202
  model_type:str,
203
  out_passages:list[str],
204
- api_model_choice=None,
205
- api_key=None,
206
- relevant_flag = True):
 
 
207
 
208
  #if chain_agent is None:
209
  # history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
@@ -211,14 +210,6 @@ def create_full_prompt(user_input:str,
211
  print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
212
 
213
  history = history or []
214
-
215
- if api_model_choice and api_model_choice != "None":
216
- print("API model choice detected")
217
- if api_key:
218
- print("API key detected")
219
- return history, "", None, relevant_flag
220
- else:
221
- return history, "", None, relevant_flag
222
 
223
  # Create instruction prompt
224
  instruction_prompt, content_prompt = base_prompt_templates(model_type=model_type)
@@ -228,17 +219,12 @@ def create_full_prompt(user_input:str,
228
  relevant_flag = False
229
  else:
230
  relevant_flag = True
231
-
232
- print("User input:", user_input)
233
 
234
  instruction_prompt_out, docs_content_string, new_question_kworded =\
235
  generate_expanded_prompt({"question": user_input, "chat_history": history}, #vectorstore,
236
  instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings, relevant_flag, out_passages)
237
 
238
  history.append({"metadata":None, "options":None, "role": 'user', "content": user_input})
239
-
240
- print("Output history is:", history)
241
- print("Final prompt to model is:",instruction_prompt_out)
242
 
243
  return history, docs_content_string, instruction_prompt_out, relevant_flag
244
 
@@ -457,13 +443,13 @@ def produce_streaming_answer_chatbot(
457
  temperature:float=temperature,
458
  relevant_query_bool:bool=True,
459
  chat_history:list[dict]=[{"metadata":None, "options":None, "role": 'user', "content": ""}],
 
460
  max_new_tokens:int=max_new_tokens,
461
  sample:bool=sample,
462
  repetition_penalty:float=repetition_penalty,
463
  top_p:float=top_p,
464
  top_k:float=top_k,
465
- max_tokens:int=max_tokens,
466
- in_api_key:str=GEMINI_API_KEY
467
  ):
468
  #print("Model type is: ", model_type)
469
 
@@ -483,9 +469,8 @@ def produce_streaming_answer_chatbot(
483
  yield history
484
  return
485
 
486
- if model_type == "Qwen 2 0.5B (small, fast)":
487
 
488
- print("tokenizer:", tokenizer)
489
  # Get the model and tokenizer, and tokenize the user text.
490
  model_inputs = tokenizer(text=full_prompt, return_tensors="pt", return_attention_mask=False).to(torch_device)
491
 
@@ -503,8 +488,6 @@ def produce_streaming_answer_chatbot(
503
  top_k=top_k
504
  )
505
 
506
- print("model_object:", model_object)
507
-
508
  t = Thread(target=model_object.generate, kwargs=generate_kwargs)
509
  t.start()
510
 
@@ -521,6 +504,7 @@ def produce_streaming_answer_chatbot(
521
  new_text = ""
522
  history[-1]['content'] += new_text
523
  NUM_TOKENS += 1
 
524
  yield history
525
  except Exception as e:
526
  print(f"Error during text generation: {e}")
@@ -533,7 +517,7 @@ def produce_streaming_answer_chatbot(
533
  print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
534
  print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
535
 
536
- elif model_type == "Phi 3.5 Mini (larger, slow)":
537
  #tokens = model.tokenize(full_prompt)
538
 
539
  gen_config = CtransGenGenerationConfig()
@@ -556,6 +540,7 @@ def produce_streaming_answer_chatbot(
556
  if "choices" in out and len(out["choices"]) > 0 and "text" in out["choices"][0]:
557
  history[-1]['content'] += out["choices"][0]["text"]
558
  NUM_TOKENS+=1
 
559
  yield history
560
  else:
561
  print(f"Unexpected output structure: {out}")
@@ -602,6 +587,11 @@ def produce_streaming_answer_chatbot(
602
  yield history
603
 
604
  elif "gemini" in model_type:
 
 
 
 
 
605
  print("Using Gemini model:", model_type)
606
  print("full_prompt:", full_prompt)
607
 
@@ -610,7 +600,7 @@ def produce_streaming_answer_chatbot(
610
 
611
  system_prompt = "You are answering questions from the user based on source material. Respond with short, factually correct answers."
612
 
613
- model, config = construct_gemini_generative_model(GEMINI_API_KEY, temperature, model_type, system_prompt, max_tokens)
614
 
615
  responses, summary_conversation_history, whole_summary_conversation, whole_conversation_metadata = process_requests(full_prompt, system_prompt, conversation_history=[], whole_conversation=[], whole_conversation_metadata=[], model=model, config = config, model_choice = model_type, temperature = temperature)
616
 
@@ -977,13 +967,9 @@ def highlight_found_text(chat_history: list[dict], source_texts: list[dict], hlt
977
  response_text = next(
978
  (entry['content'] for entry in reversed(chat_history) if entry.get('role') == 'assistant'),
979
  "")
980
-
981
- print("response_text:", response_text)
982
 
983
  source_texts = extract_text_from_input(source_texts)
984
 
985
- print("source_texts:", source_texts)
986
-
987
  text_splitter = RecursiveCharacterTextSplitter(
988
  chunk_size=hlt_chunk_size,
989
  separators=hlt_strat,
@@ -1025,8 +1011,6 @@ def highlight_found_text(chat_history: list[dict], source_texts: list[dict], hlt
1025
 
1026
  out_pos_tokens = "".join(pos_tokens)
1027
 
1028
- print("out_pos_tokens:", out_pos_tokens)
1029
-
1030
  return out_pos_tokens
1031
 
1032
 
 
5
  import time
6
  from itertools import compress
7
  import pandas as pd
 
8
  import google.generativeai as ai
9
+ import gradio as gr
10
  from gradio import Progress
11
  import boto3
12
  import json
13
+ from nltk.corpus import stopwords
14
+ from nltk.tokenize import RegexpTokenizer
15
+ from nltk.stem import WordNetLemmatizer
16
+ from keybert import KeyBERT
17
 
18
+ # For Name Entity Recognition model
19
+ #from span_marker import SpanMarkerModel # Not currently used
20
+
21
+ # For BM25 retrieval
22
+ import bm25s
23
+ import Stemmer
24
  # Model packages
25
  import torch.cuda
26
  from threading import Thread
27
  from transformers import pipeline, TextIteratorStreamer
 
 
 
 
 
28
  # Langchain functions
29
  from langchain.prompts import PromptTemplate
30
  from langchain_community.vectorstores import FAISS
 
32
  from langchain.text_splitter import RecursiveCharacterTextSplitter
33
  from langchain.docstore.document import Document
34
 
35
+ from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma
36
+ from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
37
+ from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME
38
 
39
  model_object = [] # Define empty list for model functions to run
40
  tokenizer = [] # Define empty list for model functions to run
41
 
 
 
42
  # ResponseObject class for AWS Bedrock calls
43
  class ResponseObject:
44
  def __init__(self, text, usage_metadata):
 
47
 
48
  bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_DEFAULT_REGION)
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  torch.cuda.empty_cache()
51
 
52
  PandasDataFrame = Type[pd.DataFrame]
53
 
54
  embeddings = None # global variable setup
55
+ embeddings_model = None # global variable setup
56
  vectorstore = None # global variable setup
57
  model_type = None # global variable setup
58
 
 
60
 
61
  source_texts = "" # Define dummy source text (full text) just to enable highlight function to load
62
 
 
63
  ## Highlight text constants
64
  hlt_chunk_size = 12
65
  hlt_strat = [" ", ". ", "! ", "? ", ": ", "\n\n", "\n", ", "]
 
74
 
75
  # Vectorstore funcs
76
 
77
+ # def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
78
 
79
+ # print(f"> Total split documents: {len(docs_out)}")
80
 
81
+ # vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
82
 
83
+ # '''
84
+ # #with open("vectorstore.pkl", "wb") as f:
85
+ # #pickle.dump(vectorstore, f)
86
+ # '''
87
+
88
+ # #if Path(save_to).exists():
89
+ # # vectorstore_func.save_local(folder_path=save_to)
90
+ # #else:
91
+ # # os.mkdir(save_to)
92
+ # # vectorstore_func.save_local(folder_path=save_to)
93
+
94
+ # global vectorstore
95
+
96
+ # vectorstore = vectorstore_func
97
+
98
+ # out_message = "Document processing complete"
99
+
100
+ # #print(out_message)
101
+ # #print(f"> Saved to: {save_to}")
102
+
103
+ # return out_message
104
 
105
+ # def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings_model=embeddings_model):
 
 
 
 
106
 
107
+ # print(f"> Total split documents: {len(docs_out)}")
108
 
109
+ # print(docs_out)
110
 
111
+ # vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings_model)
112
 
113
+ # vectorstore = vectorstore_func
 
114
 
115
+ # out_message = "Document processing complete"
116
+
117
+ # return out_message, vectorstore_func
118
 
119
  # Prompt functions
120
 
121
+ def base_prompt_templates(model_type:str = SMALL_MODEL_NAME):
122
 
123
  #EXAMPLE_PROMPT = PromptTemplate(
124
  # template="\nCONTENT:\n\n{page_content}\n\nSOURCE: {source}\n\n",
 
132
 
133
  # The main prompt:
134
 
135
+ if model_type == SMALL_MODEL_NAME:
136
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_qwen, input_variables=['question', 'summaries'])
137
+ elif model_type == LARGE_MODEL_NAME:
138
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_phi3, input_variables=['question', 'summaries'])
139
  else:
140
  INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_orca, input_variables=['question', 'summaries'])
 
146
  metadata_string = [f"{' '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}" for d in metadata_in] # ['metadata']
147
  return metadata_string
148
 
149
+ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, content_prompt:str, extracted_memory:list, vectorstore:object, embeddings:object, relevant_flag:bool = True, out_passages:int = 2, total_output_passage_chunks_size:int=5): # ,
150
 
151
  question = inputs["question"]
152
  chat_history = inputs["chat_history"]
 
172
 
173
  # Only expand passages if not tabular data
174
  if (file_type != ".csv") & (file_type != ".xlsx"):
175
+ docs_keep_as_doc, doc_df = get_expanded_passages(vectorstore, docs_keep_out, width=total_output_passage_chunks_size)
176
 
177
  # Build up sources content to add to user display
178
  doc_df['meta_clean'] = write_out_metadata_as_string(doc_df["metadata"]) # [f"<b>{' '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}</b>" for d in doc_df['metadata']]
 
188
  sources_docs_content_string = '<br><br>'.join(doc_df['content_meta'])#.replace(" "," ")#.strip()
189
 
190
  instruction_prompt_out = instruction_prompt.format(question=new_question_kworded, summaries=docs_content_string)
 
 
 
191
 
192
  return instruction_prompt_out, sources_docs_content_string, new_question_kworded
193
 
 
198
  embeddings:object,
199
  model_type:str,
200
  out_passages:list[str],
201
+ api_key:str="",
202
+ relevant_flag:bool=True):
203
+
204
+ if "gemini" in model_type and not GEMINI_API_KEY and not api_key:
205
+ raise Exception("Gemini model selected but no API key found. Please enter an API key on the Advanced settings page.")
206
 
207
  #if chain_agent is None:
208
  # history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
 
210
  print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
211
 
212
  history = history or []
 
 
 
 
 
 
 
 
213
 
214
  # Create instruction prompt
215
  instruction_prompt, content_prompt = base_prompt_templates(model_type=model_type)
 
219
  relevant_flag = False
220
  else:
221
  relevant_flag = True
 
 
222
 
223
  instruction_prompt_out, docs_content_string, new_question_kworded =\
224
  generate_expanded_prompt({"question": user_input, "chat_history": history}, #vectorstore,
225
  instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings, relevant_flag, out_passages)
226
 
227
  history.append({"metadata":None, "options":None, "role": 'user', "content": user_input})
 
 
 
228
 
229
  return history, docs_content_string, instruction_prompt_out, relevant_flag
230
 
 
443
  temperature:float=temperature,
444
  relevant_query_bool:bool=True,
445
  chat_history:list[dict]=[{"metadata":None, "options":None, "role": 'user', "content": ""}],
446
+ in_api_key:str=GEMINI_API_KEY,
447
  max_new_tokens:int=max_new_tokens,
448
  sample:bool=sample,
449
  repetition_penalty:float=repetition_penalty,
450
  top_p:float=top_p,
451
  top_k:float=top_k,
452
+ max_tokens:int=max_tokens
 
453
  ):
454
  #print("Model type is: ", model_type)
455
 
 
469
  yield history
470
  return
471
 
472
+ if model_type == SMALL_MODEL_NAME:
473
 
 
474
  # Get the model and tokenizer, and tokenize the user text.
475
  model_inputs = tokenizer(text=full_prompt, return_tensors="pt", return_attention_mask=False).to(torch_device)
476
 
 
488
  top_k=top_k
489
  )
490
 
 
 
491
  t = Thread(target=model_object.generate, kwargs=generate_kwargs)
492
  t.start()
493
 
 
504
  new_text = ""
505
  history[-1]['content'] += new_text
506
  NUM_TOKENS += 1
507
+ history[-1]['content'] = history[-1]['content'].replace('<|im_end|>','')
508
  yield history
509
  except Exception as e:
510
  print(f"Error during text generation: {e}")
 
517
  print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
518
  print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
519
 
520
+ elif model_type == LARGE_MODEL_NAME:
521
  #tokens = model.tokenize(full_prompt)
522
 
523
  gen_config = CtransGenGenerationConfig()
 
540
  if "choices" in out and len(out["choices"]) > 0 and "text" in out["choices"][0]:
541
  history[-1]['content'] += out["choices"][0]["text"]
542
  NUM_TOKENS+=1
543
+ history[-1]['content'] = history[-1]['content'].replace('<|im_end|>','')
544
  yield history
545
  else:
546
  print(f"Unexpected output structure: {out}")
 
587
  yield history
588
 
589
  elif "gemini" in model_type:
590
+
591
+ if in_api_key: gemini_api_key = in_api_key
592
+ elif GEMINI_API_KEY: gemini_api_key = GEMINI_API_KEY
593
+ else: raise Exception("Gemini API key not found. Please enter a key on the Advanced settings page or select another model type")
594
+
595
  print("Using Gemini model:", model_type)
596
  print("full_prompt:", full_prompt)
597
 
 
600
 
601
  system_prompt = "You are answering questions from the user based on source material. Respond with short, factually correct answers."
602
 
603
+ model, config = construct_gemini_generative_model(gemini_api_key, temperature, model_type, system_prompt, max_tokens)
604
 
605
  responses, summary_conversation_history, whole_summary_conversation, whole_conversation_metadata = process_requests(full_prompt, system_prompt, conversation_history=[], whole_conversation=[], whole_conversation_metadata=[], model=model, config = config, model_choice = model_type, temperature = temperature)
606
 
 
967
  response_text = next(
968
  (entry['content'] for entry in reversed(chat_history) if entry.get('role') == 'assistant'),
969
  "")
 
 
970
 
971
  source_texts = extract_text_from_input(source_texts)
972
 
 
 
973
  text_splitter = RecursiveCharacterTextSplitter(
974
  chunk_size=hlt_chunk_size,
975
  separators=hlt_strat,
 
1011
 
1012
  out_pos_tokens = "".join(pos_tokens)
1013
 
 
 
1014
  return out_pos_tokens
1015
 
1016
 
chatfuncs/config.py CHANGED
@@ -165,6 +165,8 @@ DISPLAY_FILE_NAMES_IN_LOGS = get_or_create_env_var('DISPLAY_FILE_NAMES_IN_LOGS',
165
  # RUN CONFIG
166
  GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
167
 
 
 
168
 
169
  # Number of pages to loop through before breaking the function and restarting from the last finished page (not currently activated).
170
  PAGE_BREAK_VALUE = get_or_create_env_var('PAGE_BREAK_VALUE', '99999')
@@ -175,6 +177,37 @@ MAX_TIME_VALUE = get_or_create_env_var('MAX_TIME_VALUE', '999999')
175
  # APP RUN CONFIG
176
  ###
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  # Get some environment variables and Launch the Gradio app
179
  COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')
180
 
 
165
  # RUN CONFIG
166
  GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
167
 
168
+ HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
169
+
170
 
171
  # Number of pages to loop through before breaking the function and restarting from the last finished page (not currently activated).
172
  PAGE_BREAK_VALUE = get_or_create_env_var('PAGE_BREAK_VALUE', '99999')
 
177
  # APP RUN CONFIG
178
  ###
179
 
180
+ SMALL_MODEL_NAME = get_or_create_env_var("SMALL_MODEL_NAME", "Gemma 3 1B (small, fast)") # "Qwen 2 0.5B (small, fast)"
181
+
182
+ SMALL_MODEL_REPO_ID = get_or_create_env_var("SMALL_MODEL_REPO_ID", 'google/gemma-3-1b-it') #'Qwen/Qwen2-0.5B-Instruct')
183
+
184
+ LARGE_MODEL_NAME = get_or_create_env_var("LARGE_MODEL_NAME", "Phi 3.5 Mini (larger, slow)")
185
+
186
+ LARGE_MODEL_REPO_ID = get_or_create_env_var("LARGE_MODEL_REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF") # "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
187
+ LARGE_MODEL_GGUF_FILE = get_or_create_env_var("LARGE_MODEL_GGUF_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
188
+
189
+ if RUN_AWS_FUNCTIONS == "1":
190
+ default_model_choices = f'["{SMALL_MODEL_NAME}", "{LARGE_MODEL_NAME}", "gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25", "anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"]'
191
+ else:
192
+ default_model_choices = f'["{SMALL_MODEL_NAME}", "{LARGE_MODEL_NAME}", "gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]'
193
+
194
+ DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", default_model_choices)
195
+
196
+ EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "BAAI/bge-base-en-v1.5") #"mixedbread-ai/mxbai-embed-xsmall-v1"
197
+
198
+ DEFAULT_EMBEDDINGS_LOCATION = get_or_create_env_var('DEFAULT_EMBEDDINGS_LOCATION', "faiss_embedding")
199
+
200
+ DEFAULT_DATA_SOURCE_NAME = get_or_create_env_var('DEFAULT_DATA_SOURCE_NAME', "Document redaction app documentation")
201
+
202
+ DEFAULT_DATA_SOURCE = get_or_create_env_var('DEFAULT_DATA_SOURCE', "https://seanpedrick-case.github.io/doc_redaction/README.html")
203
+
204
+ DEFAULT_EXAMPLES = get_or_create_env_var('DEFAULT_EXAMPLES', '[ "How can I make a custom deny list?", "How can I find page duplicates?", "How can I review and modify existing redactions?", "How can I export my review files to Adobe?"]')
205
+ #
206
+ # ') # ["What were the five pillars of the previous borough plan?",
207
+ #"What is the vision statement for Lambeth?",
208
+ #"What are the commitments for Lambeth?",
209
+ #"What are the 2030 outcomes for Lambeth?"]
210
+
211
  # Get some environment variables and Launch the Gradio app
212
  COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')
213
 
chatfuncs/helper_functions.py CHANGED
@@ -1,6 +1,9 @@
1
  import os
2
  import gradio as gr
3
  import pandas as pd
 
 
 
4
 
5
  def get_or_create_env_var(var_name, default_value):
6
  # Get the environment variable if it exists
@@ -13,12 +16,6 @@ def get_or_create_env_var(var_name, default_value):
13
 
14
  return value
15
 
16
- # Retrieving or setting output folder
17
- env_var_name = 'GRADIO_OUTPUT_FOLDER'
18
- default_value = 'output/'
19
-
20
- output_folder = get_or_create_env_var(env_var_name, default_value)
21
- print(f'The value of {env_var_name} is {output_folder}')
22
 
23
  def get_file_path_with_extension(file_path):
24
  # First, get the basename of the file (e.g., "example.txt" from "/path/to/example.txt")
@@ -165,64 +162,129 @@ def wipe_logs(feedback_logs_loc, usage_logs_loc):
165
 
166
 
167
 
168
- async def get_connection_params(request: gr.Request):
169
- base_folder = ""
170
-
171
- if request:
172
- #print("request user:", request.username)
173
-
174
- #request_data = await request.json() # Parse JSON body
175
- #print("All request data:", request_data)
176
- #context_value = request_data.get('context')
177
- #if 'context' in request_data:
178
- # print("Request context dictionary:", request_data['context'])
179
-
180
- # print("Request headers dictionary:", request.headers)
181
- # print("All host elements", request.client)
182
- # print("IP address:", request.client.host)
183
- # print("Query parameters:", dict(request.query_params))
184
- # To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/
185
- #print("Request dictionary to object:", request.request.body())
186
- print("Session hash:", request.session_hash)
187
-
188
- # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER
189
- CUSTOM_CLOUDFRONT_HEADER_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER', '')
190
- #print(f'The value of CUSTOM_CLOUDFRONT_HEADER is {CUSTOM_CLOUDFRONT_HEADER_var}')
191
-
192
- # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER_VALUE
193
- CUSTOM_CLOUDFRONT_HEADER_VALUE_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER_VALUE', '')
194
- #print(f'The value of CUSTOM_CLOUDFRONT_HEADER_VALUE_var is {CUSTOM_CLOUDFRONT_HEADER_VALUE_var}')
195
-
196
- if CUSTOM_CLOUDFRONT_HEADER_var and CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
197
- if CUSTOM_CLOUDFRONT_HEADER_var in request.headers:
198
- supplied_cloudfront_custom_value = request.headers[CUSTOM_CLOUDFRONT_HEADER_var]
199
- if supplied_cloudfront_custom_value == CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
200
- print("Custom Cloudfront header found:", supplied_cloudfront_custom_value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  else:
202
- raise(ValueError, "Custom Cloudfront header value does not match expected value.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
- # Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
205
-
206
- if request.username:
207
- out_session_hash = request.username
208
- base_folder = "user-files/"
209
- print("Request username found:", out_session_hash)
210
 
211
- elif 'x-cognito-id' in request.headers:
212
- out_session_hash = request.headers['x-cognito-id']
213
- base_folder = "user-files/"
214
- print("Cognito ID found:", out_session_hash)
215
 
216
- else:
217
- out_session_hash = request.session_hash
218
- base_folder = "temp-files/"
219
- # print("Cognito ID not found. Using session hash as save folder:", out_session_hash)
220
 
221
- output_folder = base_folder + out_session_hash + "/"
222
- #if bucket_name:
223
- # print("S3 output folder is: " + "s3://" + bucket_name + "/" + output_folder)
224
 
225
- return out_session_hash, output_folder, out_session_hash
226
- else:
227
- print("No session parameters found.")
228
- return "",""
 
1
  import os
2
  import gradio as gr
3
  import pandas as pd
4
+ import boto3
5
+ from botocore.exceptions import ClientError
6
+ from chatfuncs.config import CUSTOM_HEADER_VALUE, CUSTOM_HEADER, OUTPUT_FOLDER, INPUT_FOLDER, SESSION_OUTPUT_FOLDER, AWS_USER_POOL_ID
7
 
8
  def get_or_create_env_var(var_name, default_value):
9
  # Get the environment variable if it exists
 
16
 
17
  return value
18
 
 
 
 
 
 
 
19
 
20
  def get_file_path_with_extension(file_path):
21
  # First, get the basename of the file (e.g., "example.txt" from "/path/to/example.txt")
 
162
 
163
 
164
 
165
+ # async def get_connection_params(request: gr.Request):
166
+ # base_folder = ""
167
+
168
+ # if request:
169
+ # #print("request user:", request.username)
170
+
171
+ # #request_data = await request.json() # Parse JSON body
172
+ # #print("All request data:", request_data)
173
+ # #context_value = request_data.get('context')
174
+ # #if 'context' in request_data:
175
+ # # print("Request context dictionary:", request_data['context'])
176
+
177
+ # # print("Request headers dictionary:", request.headers)
178
+ # # print("All host elements", request.client)
179
+ # # print("IP address:", request.client.host)
180
+ # # print("Query parameters:", dict(request.query_params))
181
+ # # To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/
182
+ # #print("Request dictionary to object:", request.request.body())
183
+ # print("Session hash:", request.session_hash)
184
+
185
+ # # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER
186
+ # CUSTOM_CLOUDFRONT_HEADER_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER', '')
187
+ # #print(f'The value of CUSTOM_CLOUDFRONT_HEADER is {CUSTOM_CLOUDFRONT_HEADER_var}')
188
+
189
+ # # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER_VALUE
190
+ # CUSTOM_CLOUDFRONT_HEADER_VALUE_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER_VALUE', '')
191
+ # #print(f'The value of CUSTOM_CLOUDFRONT_HEADER_VALUE_var is {CUSTOM_CLOUDFRONT_HEADER_VALUE_var}')
192
+
193
+ # if CUSTOM_CLOUDFRONT_HEADER_var and CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
194
+ # if CUSTOM_CLOUDFRONT_HEADER_var in request.headers:
195
+ # supplied_cloudfront_custom_value = request.headers[CUSTOM_CLOUDFRONT_HEADER_var]
196
+ # if supplied_cloudfront_custom_value == CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
197
+ # print("Custom Cloudfront header found:", supplied_cloudfront_custom_value)
198
+ # else:
199
+ # raise(ValueError, "Custom Cloudfront header value does not match expected value.")
200
+
201
+ # # Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
202
+
203
+ # if request.username:
204
+ # out_session_hash = request.username
205
+ # base_folder = "user-files/"
206
+ # print("Request username found:", out_session_hash)
207
+
208
+ # elif 'x-cognito-id' in request.headers:
209
+ # out_session_hash = request.headers['x-cognito-id']
210
+ # base_folder = "user-files/"
211
+ # print("Cognito ID found:", out_session_hash)
212
+
213
+ # else:
214
+ # out_session_hash = request.session_hash
215
+ # base_folder = "temp-files/"
216
+ # # print("Cognito ID not found. Using session hash as save folder:", out_session_hash)
217
+
218
+ # output_folder = base_folder + out_session_hash + "/"
219
+ # #if bucket_name:
220
+ # # print("S3 output folder is: " + "s3://" + bucket_name + "/" + output_folder)
221
+
222
+ # return out_session_hash, output_folder, out_session_hash
223
+ # else:
224
+ # print("No session parameters found.")
225
+ # return "",""
226
+
227
+ async def get_connection_params(request: gr.Request,
228
+ output_folder_textbox:str=OUTPUT_FOLDER,
229
+ input_folder_textbox:str=INPUT_FOLDER,
230
+ session_output_folder:str=SESSION_OUTPUT_FOLDER):
231
+
232
+ #print("Session hash:", request.session_hash)
233
+
234
+ if CUSTOM_HEADER and CUSTOM_HEADER_VALUE:
235
+ if CUSTOM_HEADER in request.headers:
236
+ supplied_custom_header_value = request.headers[CUSTOM_HEADER]
237
+ if supplied_custom_header_value == CUSTOM_HEADER_VALUE:
238
+ print("Custom header supplied and matches CUSTOM_HEADER_VALUE")
239
  else:
240
+ print("Custom header value does not match expected value.")
241
+ raise ValueError("Custom header value does not match expected value.")
242
+ else:
243
+ print("Custom header value not found.")
244
+ raise ValueError("Custom header value not found.")
245
+
246
+ # Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
247
+
248
+ if request.username:
249
+ out_session_hash = request.username
250
+ #print("Request username found:", out_session_hash)
251
+
252
+ elif 'x-cognito-id' in request.headers:
253
+ out_session_hash = request.headers['x-cognito-id']
254
+ #print("Cognito ID found:", out_session_hash)
255
+
256
+ elif 'x-amzn-oidc-identity' in request.headers:
257
+ out_session_hash = request.headers['x-amzn-oidc-identity']
258
+
259
+ # Fetch email address using Cognito client
260
+ cognito_client = boto3.client('cognito-idp')
261
+ try:
262
+ response = cognito_client.admin_get_user(
263
+ UserPoolId=AWS_USER_POOL_ID, # Replace with your User Pool ID
264
+ Username=out_session_hash
265
+ )
266
+ email = next(attr['Value'] for attr in response['UserAttributes'] if attr['Name'] == 'email')
267
+ #print("Email address found:", email)
268
+
269
+ out_session_hash = email
270
+ except ClientError as e:
271
+ print("Error fetching user details:", e)
272
+ email = None
273
+
274
+ print("Cognito ID found:", out_session_hash)
275
 
276
+ else:
277
+ out_session_hash = request.session_hash
 
 
 
 
278
 
279
+ if session_output_folder == 'True':
280
+ output_folder = output_folder_textbox + out_session_hash + "/"
281
+ input_folder = input_folder_textbox + out_session_hash + "/"
 
282
 
283
+ else:
284
+ output_folder = output_folder_textbox
285
+ input_folder = input_folder_textbox
 
286
 
287
+ if not os.path.exists(output_folder): os.mkdir(output_folder)
288
+ if not os.path.exists(input_folder): os.mkdir(input_folder)
 
289
 
290
+ return out_session_hash, output_folder, out_session_hash, input_folder
 
 
 
chatfuncs/ingest.py CHANGED
@@ -7,13 +7,14 @@ import requests
7
  import pandas as pd
8
  import dateutil.parser
9
  from typing import Type, List
10
- import shutil
11
 
12
- from langchain_community.embeddings import HuggingFaceEmbeddings # HuggingFaceInstructEmbeddings,
13
  from langchain_community.vectorstores.faiss import FAISS
14
  #from langchain_community.vectorstores import Chroma
15
  from langchain.text_splitter import RecursiveCharacterTextSplitter
16
  from langchain.docstore.document import Document
 
17
 
18
  from bs4 import BeautifulSoup
19
  from docx import Document as Doc
@@ -557,31 +558,24 @@ def docs_elements_from_csv_save(docs_path="documents.csv"):
557
 
558
  # ## Create embeddings and save faiss vector store to the path specified in `save_to`
559
 
560
- def load_embeddings(model_name = "BAAI/bge-base-en-v1.5"):
561
 
562
- #if model_name == "hkunlp/instructor-large":
563
- # embeddings_func = HuggingFaceInstructEmbeddings(model_name=model_name,
564
- # embed_instruction="Represent the paragraph for retrieval: ",
565
- # query_instruction="Represent the question for retrieving supporting documents: "
566
- # )
567
 
568
- #else:
569
- embeddings_func = HuggingFaceEmbeddings(model_name=model_name)
570
 
571
- global embeddings
572
 
573
- embeddings = embeddings_func
574
 
575
- return embeddings_func
576
-
577
- def embed_faiss_save_to_zip(docs_out, save_to="output", model_name="BAAI/bge-base-en-v1.5"):
578
- load_embeddings(model_name=model_name)
579
 
580
  print(f"> Total split documents: {len(docs_out)}")
581
 
582
- vectorstore = FAISS.from_documents(documents=docs_out, embedding=embeddings)
583
 
584
- save_to_path = Path(save_to)
585
  save_to_path.mkdir(parents=True, exist_ok=True)
586
 
587
  vectorstore.save_local(folder_path=str(save_to_path))
@@ -619,20 +613,20 @@ def embed_faiss_save_to_zip(docs_out, save_to="output", model_name="BAAI/bge-bas
619
 
620
 
621
 
622
- def sim_search_local_saved_vec(query, k_val, save_to="faiss_lambeth_census_embedding"):
623
 
624
- load_embeddings()
625
 
626
- docsearch = FAISS.load_local(folder_path=save_to, embeddings=embeddings)
627
 
628
 
629
- display(Markdown(question))
630
 
631
- search = docsearch.similarity_search_with_score(query, k=k_val)
632
 
633
- for item in search:
634
- print(item[0].page_content)
635
- print(f"Page: {item[0].metadata['source']}")
636
- print(f"Date: {item[0].metadata['date']}")
637
- print(f"Score: {item[1]}")
638
- print("---")
 
7
  import pandas as pd
8
  import dateutil.parser
9
  from typing import Type, List
10
+ #import shutil
11
 
12
+ #from langchain_community.embeddings import HuggingFaceEmbeddings # HuggingFaceInstructEmbeddings,
13
  from langchain_community.vectorstores.faiss import FAISS
14
  #from langchain_community.vectorstores import Chroma
15
  from langchain.text_splitter import RecursiveCharacterTextSplitter
16
  from langchain.docstore.document import Document
17
+ #from chatfuncs.config import EMBEDDINGS_MODEL_NAME
18
 
19
  from bs4 import BeautifulSoup
20
  from docx import Document as Doc
 
558
 
559
  # ## Create embeddings and save faiss vector store to the path specified in `save_to`
560
 
561
+ # def load_embeddings_model(embeddings_model = EMBEDDINGS_MODEL_NAME):
562
 
563
+ # embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_model)
 
 
 
 
564
 
565
+ # #global embeddings
 
566
 
567
+ # #embeddings = embeddings_func
568
 
569
+ # return embeddings_func
570
 
571
+ def embed_faiss_save_to_zip(docs_out, save_folder, embeddings_model_object, save_to="faiss_embeddings", model_name="BAAI/bge-base-en-v1.5"):
572
+ #load_embeddings(model_name=model_name)
 
 
573
 
574
  print(f"> Total split documents: {len(docs_out)}")
575
 
576
+ vectorstore = FAISS.from_documents(documents=docs_out, embedding=embeddings_model_object)
577
 
578
+ save_to_path = Path(save_folder, save_to)
579
  save_to_path.mkdir(parents=True, exist_ok=True)
580
 
581
  vectorstore.save_local(folder_path=str(save_to_path))
 
613
 
614
 
615
 
616
+ # def sim_search_local_saved_vec(query, k_val, save_to="faiss_lambeth_census_embedding"):
617
 
618
+ # load_embeddings()
619
 
620
+ # docsearch = FAISS.load_local(folder_path=save_to, embeddings=embeddings)
621
 
622
 
623
+ # display(Markdown(question))
624
 
625
+ # search = docsearch.similarity_search_with_score(query, k=k_val)
626
 
627
+ # for item in search:
628
+ # print(item[0].page_content)
629
+ # print(f"Page: {item[0].metadata['source']}")
630
+ # print(f"Date: {item[0].metadata['date']}")
631
+ # print(f"Score: {item[1]}")
632
+ # print("---")
chatfuncs/prompts.py CHANGED
@@ -71,4 +71,9 @@ Answer the QUESTION using information from the following CONTENT. Respond with s
71
  CONTENT: {summaries}
72
  QUESTION: {question}\n
73
  Answer:<|im_end|>
74
- <|im_start|>assistant\n"""
 
 
 
 
 
 
71
  CONTENT: {summaries}
72
  QUESTION: {question}\n
73
  Answer:<|im_end|>
74
+ <|im_start|>assistant\n"""
75
+
76
+ instruction_prompt_gemma = """Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.
77
+ CONTENT: {summaries}
78
+ QUESTION: {question}
79
+ assistant:"""
faiss_embedding/faiss_embedding.zip CHANGED
Binary files a/faiss_embedding/faiss_embedding.zip and b/faiss_embedding/faiss_embedding.zip differ
 
requirements.txt CHANGED
@@ -4,7 +4,7 @@ langchain-community==0.3.22
4
  beautifulsoup4==4.13.4
5
  google-generativeai==0.8.5
6
  pandas==2.2.3
7
- transformers==4.41.2
8
  # For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
9
  llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
10
  #-C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 
4
  beautifulsoup4==4.13.4
5
  google-generativeai==0.8.5
6
  pandas==2.2.3
7
+ transformers==4.51.3
8
  # For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
9
  llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
10
  #-C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"