Spaces:

TabasumDev
/

GraniteByte

Sleeping

App Files Files Community

TabasumDev commited on Feb 22

Commit

6d59cf8

verified ·

1 Parent(s): 461646b

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -59

app.py CHANGED Viewed

@@ -444,32 +444,27 @@
 # 🔥 Run Streamlit App
 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from PyPDF2 import PdfReader
 from peft import get_peft_model, LoraConfig, TaskType
-# ✅ Force CPU execution and disable bitsandbytes
-os.environ["BITSANDBYTES_NOWELCOME"] = "1"
-os.environ["USE_TORCH_CPP_BACKEND"] = "1"
 # 🔹 Load IBM Granite Model (CPU-Compatible)
 MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
-try:
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_NAME,
-        device_map="cpu",
-        torch_dtype=torch.float32
-    )
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-except Exception as e:
-    st.error(f"🚨 Model loading failed: {str(e)}")
-    st.stop()
 # 🔹 Apply LoRA Fine-Tuning Configuration
 lora_config = LoraConfig(
@@ -480,34 +475,23 @@ lora_config = LoraConfig(
     bias="none",
     task_type=TaskType.CAUSAL_LM
 )
-try:
-    model = get_peft_model(model, lora_config)
-    model.eval()
-except Exception as e:
-    st.error(f"🚨 LoRA model initialization failed: {str(e)}")
-    st.stop()
-# 🛠 Function to Read & Extract Text from PDFs
 def read_files(file):
-    """Extracts text from uploaded PDF file."""
     file_context = ""
-    try:
-        reader = PdfReader(file)
         for page in reader.pages:
             text = page.extract_text()
             if text:
                 file_context += text + "\n"
-    except Exception as e:
-        st.error(f"🚨 PDF reading failed: {str(e)}")
-        return ""
-    return file_context.strip() if file_context else "No readable text found in the document."
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
     if file_context:
-        system_msg += " The user has provided a contract document. Analyze it and extract key insights, but do not summarize it."
     return [
         {"role": "system", "content": system_msg},
         {"role": "user", "content": user_msg}
@@ -515,36 +499,31 @@ def format_prompt(system_msg, user_msg, file_context=""):
 # 🛠 Function to Generate AI Responses
 def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
-    """Generates response using IBM Granite."""
-    try:
-        model_inputs = tokenizer([input_text], return_tensors="pt").to("cpu")
-        with torch.no_grad():
-            output = model.generate(
-                **model_inputs,
-                max_new_tokens=max_tokens,
-                do_sample=True,
-                top_p=top_p,
-                temperature=temperature,
-                num_return_sequences=1,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        return tokenizer.decode(output[0], skip_special_tokens=True)
-    except Exception as e:
-        return f"🚨 Model inference error: {str(e)}"
 # 🛠 Function to Clean AI Output
 def post_process(text):
     cleaned = re.sub(r'戥+', '', text)  # Remove unwanted symbols
     lines = cleaned.splitlines()
     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
     return "\n".join(unique_lines)
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
-    """Processes PDF and AI response."""
     file_context = read_files(file) if file else ""
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
@@ -557,7 +536,7 @@ def granite_simple(prompt, file):
 # 🔹 Streamlit UI
 def main():
-    st.set_page_config(page_title="Contract Analysis AI", page_icon="📜")
     st.title("📜 AI-Powered Contract Analysis Tool")
     st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
@@ -572,20 +551,27 @@ def main():
     # 🔹 File Upload Section
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
-    if uploaded_file:
-        st.success("✅ File uploaded successfully! Click below to analyze.")
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
-                final_answer = granite_simple(
-                    "Perform a detailed analysis of the contract, highlighting risks, legal pitfalls, compliance issues, and potential disputes.",
-                    uploaded_file
-                )
             # 🔹 Display Analysis Result
             st.subheader("📑 Analysis Result")
             st.write(final_answer)
 # 🔥 Run Streamlit App
 if __name__ == '__main__':
     main()
@@ -595,7 +581,6 @@ if __name__ == '__main__':
 # import streamlit as st
 # from PyPDF2 import PdfReader

 # 🔥 Run Streamlit App
 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
 import torch
+import pdfplumber
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import get_peft_model, LoraConfig, TaskType
+# ✅ Force CPU execution for Streamlit Cloud
+device = torch.device("cpu")
 # 🔹 Load IBM Granite Model (CPU-Compatible)
 MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    device_map="cpu",  # Force CPU execution
+    torch_dtype=torch.float32  # Use float32 since Streamlit runs on CPU
+)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 # 🔹 Apply LoRA Fine-Tuning Configuration
 lora_config = LoraConfig(
     bias="none",
     task_type=TaskType.CAUSAL_LM
 )
+model = get_peft_model(model, lora_config)
+model.eval()
+# 🛠 Function to Read & Extract Text from PDFs (Using pdfplumber)
 def read_files(file):
     file_context = ""
+    with pdfplumber.open(file) as reader:
         for page in reader.pages:
             text = page.extract_text()
             if text:
                 file_context += text + "\n"
+    return file_context.strip()
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
     if file_context:
+        system_msg += " The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
     return [
         {"role": "system", "content": system_msg},
         {"role": "user", "content": user_msg}
 # 🛠 Function to Generate AI Responses
 def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
+    model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
+    with torch.no_grad():
+        output = model.generate(
+            **model_inputs,
+            max_new_tokens=max_tokens,
+            do_sample=True,
+            top_p=top_p,
+            temperature=temperature,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    return tokenizer.decode(output[0], skip_special_tokens=True)
 # 🛠 Function to Clean AI Output
 def post_process(text):
     cleaned = re.sub(r'戥+', '', text)  # Remove unwanted symbols
     lines = cleaned.splitlines()
     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
     return "\n".join(unique_lines)
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
     file_context = read_files(file) if file else ""
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
 # 🔹 Streamlit UI
 def main():
+    st.set_page_config(page_title="Contract Analysis AI", page_icon="📜", layout="wide")
     st.title("📜 AI-Powered Contract Analysis Tool")
     st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
     # 🔹 File Upload Section
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
+    if uploaded_file is not None:
+        temp_file_path = "temp_uploaded_contract.pdf"
+        with open(temp_file_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        st.success("✅ File uploaded successfully!")
+        # 🔹 User Input for Analysis
+        user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
+                final_answer = granite_simple(user_prompt, temp_file_path)
             # 🔹 Display Analysis Result
             st.subheader("📑 Analysis Result")
             st.write(final_answer)
+            # 🔹 Remove Temporary File
+            os.remove(temp_file_path)
 # 🔥 Run Streamlit App
 if __name__ == '__main__':
     main()
 # import streamlit as st
 # from PyPDF2 import PdfReader