Update main.py
Browse files
main.py
CHANGED
@@ -228,6 +228,31 @@ if __name__ == "__main__":
|
|
228 |
print("\n🧠 LLaMA 3 Answer:")
|
229 |
print(answer)
|
230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
# This version includes all 3 enhancements:
|
232 |
# - Smart chunking via smart_chunk.py
|
233 |
# - High-quality embedding model (E5)
|
|
|
228 |
print("\n🧠 LLaMA 3 Answer:")
|
229 |
print(answer)
|
230 |
|
231 |
+
|
232 |
+
def initialize_index(update_mode="none"):
|
233 |
+
global documents, vector_index
|
234 |
+
|
235 |
+
processed_pdfs = load_cache(PDF_CACHE_FILE)
|
236 |
+
processed_urls = load_cache(URL_CACHE_FILE)
|
237 |
+
|
238 |
+
if update_mode == "updateall":
|
239 |
+
processed_pdfs = set()
|
240 |
+
processed_urls = set()
|
241 |
+
|
242 |
+
index_loaded = load_index()
|
243 |
+
|
244 |
+
if update_mode == "updateall" or not index_loaded or update_mode == "update":
|
245 |
+
process_pdf_folder(processed_files=processed_pdfs)
|
246 |
+
for url in load_urls():
|
247 |
+
crawl_url(url, depth=1, processed_urls=processed_urls)
|
248 |
+
save_index()
|
249 |
+
save_cache(processed_pdfs, PDF_CACHE_FILE)
|
250 |
+
save_cache(processed_urls, URL_CACHE_FILE)
|
251 |
+
else:
|
252 |
+
print(f"✅ FAISS index with {vector_index.ntotal} vectors loaded.")
|
253 |
+
print(f"✅ Loaded {len(documents)} legal document chunks.")
|
254 |
+
|
255 |
+
|
256 |
# This version includes all 3 enhancements:
|
257 |
# - Smart chunking via smart_chunk.py
|
258 |
# - High-quality embedding model (E5)
|