Spaces:

safiaa02
/

RAG-Child-Development-Tracker

Sleeping

App Files Files Community

safiaa02 commited on Feb 22

Commit

4d4ad35

verified ·

1 Parent(s): 63b8d64

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -6

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ import numpy as np
 import torch
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForCausalLM
-from peft import PeftModel
 from reportlab.lib.pagesizes import A4
 from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
 from reportlab.lib.styles import getSampleStyleSheet
@@ -56,21 +55,19 @@ def retrieve_milestone(user_input):
     return descriptions[indices[0][0]] if indices[0][0] < len(descriptions) else "No relevant milestone found."
 # Initialize IBM Granite Model
-BASE_NAME = "ibm-granite/granite-3.0-8b-instruct"
-LORA_NAME = "ibm-granite/granite-rag-3.0-8b-lora"
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 tokenizer = AutoTokenizer.from_pretrained(BASE_NAME, padding_side='left', trust_remote_code=True)
 model_base = AutoModelForCausalLM.from_pretrained(BASE_NAME, device_map="auto")
-model_rag = PeftModel.from_pretrained(model_base, LORA_NAME)
 def generate_response(user_input, child_age):
     relevant_milestone = retrieve_milestone(user_input)
     question_chat = [
         {
             "role": "system",
-            "content": "{\"instruction\": \"Respond to the user's latest question based solely on the information provided in the documents. Ensure that your response is strictly aligned with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data. Make sure that your response follows the attributes mentioned in the 'meta' field.\", \"documents\": [{\"doc_id\": 1, \"text\": \"The child is {child_age} months old. Based on the given traits: {user_input}, determine whether the child is meeting expected milestones. Relevant milestone: {relevant_milestone}. If there are any concerns, suggest steps the parents can take.\"}], \"meta\": {\"hallucination_tags\": true, \"citations\": true}}"
         },
         {
             "role": "user",
@@ -79,7 +76,7 @@ def generate_response(user_input, child_age):
     ]
     input_text = tokenizer.apply_chat_template(question_chat, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(input_text, return_tensors="pt")
-    output = model_rag.generate(inputs["input_ids"].to(device), attention_mask=inputs["attention_mask"].to(device), max_new_tokens=500)
     output_text = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
     return output_text

 import torch
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from reportlab.lib.pagesizes import A4
 from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
 from reportlab.lib.styles import getSampleStyleSheet
     return descriptions[indices[0][0]] if indices[0][0] < len(descriptions) else "No relevant milestone found."
 # Initialize IBM Granite Model
+BASE_NAME = "ibm-granite/granite-3.0-2b-base"
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 tokenizer = AutoTokenizer.from_pretrained(BASE_NAME, padding_side='left', trust_remote_code=True)
 model_base = AutoModelForCausalLM.from_pretrained(BASE_NAME, device_map="auto")
 def generate_response(user_input, child_age):
     relevant_milestone = retrieve_milestone(user_input)
     question_chat = [
         {
             "role": "system",
+            "content": f"The child is {child_age} months old. Based on the given traits: {user_input}, determine whether the child is meeting expected milestones. Relevant milestone: {relevant_milestone}. If there are any concerns, suggest steps the parents can take."
         },
         {
             "role": "user",
     ]
     input_text = tokenizer.apply_chat_template(question_chat, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(input_text, return_tensors="pt")
+    output = model_base.generate(inputs["input_ids"].to(device), attention_mask=inputs["attention_mask"].to(device), max_new_tokens=500)
     output_text = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
     return output_text