TabasumDev commited on
Commit
6d59cf8
Β·
verified Β·
1 Parent(s): 461646b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -59
app.py CHANGED
@@ -444,32 +444,27 @@
444
  # πŸ”₯ Run Streamlit App
445
  # if __name__ == '__main__':
446
  # main()
447
-
448
  import streamlit as st
449
  import os
450
  import re
451
  import torch
 
452
  from transformers import AutoModelForCausalLM, AutoTokenizer
453
- from PyPDF2 import PdfReader
454
  from peft import get_peft_model, LoraConfig, TaskType
455
 
456
- # βœ… Force CPU execution and disable bitsandbytes
457
- os.environ["BITSANDBYTES_NOWELCOME"] = "1"
458
- os.environ["USE_TORCH_CPP_BACKEND"] = "1"
459
 
460
  # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
461
  MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
462
 
463
- try:
464
- model = AutoModelForCausalLM.from_pretrained(
465
- MODEL_NAME,
466
- device_map="cpu",
467
- torch_dtype=torch.float32
468
- )
469
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
470
- except Exception as e:
471
- st.error(f"🚨 Model loading failed: {str(e)}")
472
- st.stop()
473
 
474
  # πŸ”Ή Apply LoRA Fine-Tuning Configuration
475
  lora_config = LoraConfig(
@@ -480,34 +475,23 @@ lora_config = LoraConfig(
480
  bias="none",
481
  task_type=TaskType.CAUSAL_LM
482
  )
 
 
483
 
484
- try:
485
- model = get_peft_model(model, lora_config)
486
- model.eval()
487
- except Exception as e:
488
- st.error(f"🚨 LoRA model initialization failed: {str(e)}")
489
- st.stop()
490
-
491
- # πŸ›  Function to Read & Extract Text from PDFs
492
  def read_files(file):
493
- """Extracts text from uploaded PDF file."""
494
  file_context = ""
495
- try:
496
- reader = PdfReader(file)
497
  for page in reader.pages:
498
  text = page.extract_text()
499
  if text:
500
  file_context += text + "\n"
501
- except Exception as e:
502
- st.error(f"🚨 PDF reading failed: {str(e)}")
503
- return ""
504
-
505
- return file_context.strip() if file_context else "No readable text found in the document."
506
 
507
  # πŸ›  Function to Format AI Prompts
508
  def format_prompt(system_msg, user_msg, file_context=""):
509
  if file_context:
510
- system_msg += " The user has provided a contract document. Analyze it and extract key insights, but do not summarize it."
511
  return [
512
  {"role": "system", "content": system_msg},
513
  {"role": "user", "content": user_msg}
@@ -515,36 +499,31 @@ def format_prompt(system_msg, user_msg, file_context=""):
515
 
516
  # πŸ›  Function to Generate AI Responses
517
  def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
518
- """Generates response using IBM Granite."""
519
- try:
520
- model_inputs = tokenizer([input_text], return_tensors="pt").to("cpu")
521
-
522
- with torch.no_grad():
523
- output = model.generate(
524
- **model_inputs,
525
- max_new_tokens=max_tokens,
526
- do_sample=True,
527
- top_p=top_p,
528
- temperature=temperature,
529
- num_return_sequences=1,
530
- pad_token_id=tokenizer.eos_token_id
531
- )
532
-
533
- return tokenizer.decode(output[0], skip_special_tokens=True)
534
 
535
- except Exception as e:
536
- return f"🚨 Model inference error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
537
 
538
  # πŸ›  Function to Clean AI Output
539
  def post_process(text):
540
  cleaned = re.sub(r'ζˆ₯+', '', text) # Remove unwanted symbols
541
  lines = cleaned.splitlines()
542
  unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
 
543
  return "\n".join(unique_lines)
544
 
545
  # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
546
  def granite_simple(prompt, file):
547
- """Processes PDF and AI response."""
548
  file_context = read_files(file) if file else ""
549
 
550
  system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
@@ -557,7 +536,7 @@ def granite_simple(prompt, file):
557
 
558
  # πŸ”Ή Streamlit UI
559
  def main():
560
- st.set_page_config(page_title="Contract Analysis AI", page_icon="πŸ“œ")
561
 
562
  st.title("πŸ“œ AI-Powered Contract Analysis Tool")
563
  st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
@@ -572,20 +551,27 @@ def main():
572
  # πŸ”Ή File Upload Section
573
  uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
574
 
575
- if uploaded_file:
576
- st.success("βœ… File uploaded successfully! Click below to analyze.")
 
 
 
 
 
 
 
577
 
578
  if st.button("πŸ” Analyze Document"):
579
  with st.spinner("Analyzing contract document... ⏳"):
580
- final_answer = granite_simple(
581
- "Perform a detailed analysis of the contract, highlighting risks, legal pitfalls, compliance issues, and potential disputes.",
582
- uploaded_file
583
- )
584
 
585
  # πŸ”Ή Display Analysis Result
586
  st.subheader("πŸ“‘ Analysis Result")
587
  st.write(final_answer)
588
 
 
 
 
589
  # πŸ”₯ Run Streamlit App
590
  if __name__ == '__main__':
591
  main()
@@ -595,7 +581,6 @@ if __name__ == '__main__':
595
 
596
 
597
 
598
-
599
  # import streamlit as st
600
  # from PyPDF2 import PdfReader
601
 
 
444
  # πŸ”₯ Run Streamlit App
445
  # if __name__ == '__main__':
446
  # main()
 
447
  import streamlit as st
448
  import os
449
  import re
450
  import torch
451
+ import pdfplumber
452
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
453
  from peft import get_peft_model, LoraConfig, TaskType
454
 
455
+ # βœ… Force CPU execution for Streamlit Cloud
456
+ device = torch.device("cpu")
 
457
 
458
  # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
459
  MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
460
 
461
+ model = AutoModelForCausalLM.from_pretrained(
462
+ MODEL_NAME,
463
+ device_map="cpu", # Force CPU execution
464
+ torch_dtype=torch.float32 # Use float32 since Streamlit runs on CPU
465
+ )
466
+
467
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
 
 
468
 
469
  # πŸ”Ή Apply LoRA Fine-Tuning Configuration
470
  lora_config = LoraConfig(
 
475
  bias="none",
476
  task_type=TaskType.CAUSAL_LM
477
  )
478
+ model = get_peft_model(model, lora_config)
479
+ model.eval()
480
 
481
+ # πŸ›  Function to Read & Extract Text from PDFs (Using pdfplumber)
 
 
 
 
 
 
 
482
  def read_files(file):
 
483
  file_context = ""
484
+ with pdfplumber.open(file) as reader:
 
485
  for page in reader.pages:
486
  text = page.extract_text()
487
  if text:
488
  file_context += text + "\n"
489
+ return file_context.strip()
 
 
 
 
490
 
491
  # πŸ›  Function to Format AI Prompts
492
  def format_prompt(system_msg, user_msg, file_context=""):
493
  if file_context:
494
+ system_msg += " The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
495
  return [
496
  {"role": "system", "content": system_msg},
497
  {"role": "user", "content": user_msg}
 
499
 
500
  # πŸ›  Function to Generate AI Responses
501
  def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
502
+ model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
+ with torch.no_grad():
505
+ output = model.generate(
506
+ **model_inputs,
507
+ max_new_tokens=max_tokens,
508
+ do_sample=True,
509
+ top_p=top_p,
510
+ temperature=temperature,
511
+ num_return_sequences=1,
512
+ pad_token_id=tokenizer.eos_token_id
513
+ )
514
+
515
+ return tokenizer.decode(output[0], skip_special_tokens=True)
516
 
517
  # πŸ›  Function to Clean AI Output
518
  def post_process(text):
519
  cleaned = re.sub(r'ζˆ₯+', '', text) # Remove unwanted symbols
520
  lines = cleaned.splitlines()
521
  unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
522
+
523
  return "\n".join(unique_lines)
524
 
525
  # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
526
  def granite_simple(prompt, file):
 
527
  file_context = read_files(file) if file else ""
528
 
529
  system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
 
536
 
537
  # πŸ”Ή Streamlit UI
538
  def main():
539
+ st.set_page_config(page_title="Contract Analysis AI", page_icon="πŸ“œ", layout="wide")
540
 
541
  st.title("πŸ“œ AI-Powered Contract Analysis Tool")
542
  st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
 
551
  # πŸ”Ή File Upload Section
552
  uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
553
 
554
+ if uploaded_file is not None:
555
+ temp_file_path = "temp_uploaded_contract.pdf"
556
+ with open(temp_file_path, "wb") as f:
557
+ f.write(uploaded_file.getbuffer())
558
+
559
+ st.success("βœ… File uploaded successfully!")
560
+
561
+ # πŸ”Ή User Input for Analysis
562
+ user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
563
 
564
  if st.button("πŸ” Analyze Document"):
565
  with st.spinner("Analyzing contract document... ⏳"):
566
+ final_answer = granite_simple(user_prompt, temp_file_path)
 
 
 
567
 
568
  # πŸ”Ή Display Analysis Result
569
  st.subheader("πŸ“‘ Analysis Result")
570
  st.write(final_answer)
571
 
572
+ # πŸ”Ή Remove Temporary File
573
+ os.remove(temp_file_path)
574
+
575
  # πŸ”₯ Run Streamlit App
576
  if __name__ == '__main__':
577
  main()
 
581
 
582
 
583
 
 
584
  # import streamlit as st
585
  # from PyPDF2 import PdfReader
586