allenlsl commited on
Commit
7907d24
·
verified ·
1 Parent(s): f9e4ddb

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +25 -0
main.py CHANGED
@@ -228,6 +228,31 @@ if __name__ == "__main__":
228
  print("\n🧠 LLaMA 3 Answer:")
229
  print(answer)
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  # This version includes all 3 enhancements:
232
  # - Smart chunking via smart_chunk.py
233
  # - High-quality embedding model (E5)
 
228
  print("\n🧠 LLaMA 3 Answer:")
229
  print(answer)
230
 
231
+
232
+ def initialize_index(update_mode="none"):
233
+ global documents, vector_index
234
+
235
+ processed_pdfs = load_cache(PDF_CACHE_FILE)
236
+ processed_urls = load_cache(URL_CACHE_FILE)
237
+
238
+ if update_mode == "updateall":
239
+ processed_pdfs = set()
240
+ processed_urls = set()
241
+
242
+ index_loaded = load_index()
243
+
244
+ if update_mode == "updateall" or not index_loaded or update_mode == "update":
245
+ process_pdf_folder(processed_files=processed_pdfs)
246
+ for url in load_urls():
247
+ crawl_url(url, depth=1, processed_urls=processed_urls)
248
+ save_index()
249
+ save_cache(processed_pdfs, PDF_CACHE_FILE)
250
+ save_cache(processed_urls, URL_CACHE_FILE)
251
+ else:
252
+ print(f"✅ FAISS index with {vector_index.ntotal} vectors loaded.")
253
+ print(f"✅ Loaded {len(documents)} legal document chunks.")
254
+
255
+
256
  # This version includes all 3 enhancements:
257
  # - Smart chunking via smart_chunk.py
258
  # - High-quality embedding model (E5)