ritvik77 commited on
Commit
fd265eb
Β·
verified Β·
1 Parent(s): 66fe6a9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +110 -7
README.md CHANGED
@@ -5,8 +5,8 @@ model_name: Doctor_AI_LoRA-Mistral-7B-Instructritvik77
5
  tags:
6
  - generated_from_trainer
7
  - trl
8
- - sft
9
  licence: license
 
10
  ---
11
 
12
  # Model Card for Doctor_AI_LoRA-Mistral-7B-Instructritvik77
@@ -17,13 +17,116 @@ It has been trained using [TRL](https://github.com/huggingface/trl).
17
  ## Quick start
18
 
19
  ```python
20
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
- generator = pipeline("text-generation", model="ritvik77/Doctor_AI_LoRA-Mistral-7B-Instructritvik77", device="cuda")
24
- output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
- print(output["generated_text"])
26
- ```
27
 
28
  ## Training procedure
29
 
 
5
  tags:
6
  - generated_from_trainer
7
  - trl
 
8
  licence: license
9
+ license: apache-2.0
10
  ---
11
 
12
  # Model Card for Doctor_AI_LoRA-Mistral-7B-Instructritvik77
 
17
  ## Quick start
18
 
19
  ```python
20
+ # from peft import PeftModel, PeftConfig
21
+ # from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
22
+ # from datasets import load_dataset
23
+ # import torch
24
+
25
+ # # Quantization config for 4-bit loading
26
+ # bnb_config = BitsAndBytesConfig(
27
+ # load_in_4bit=True,
28
+ # bnb_4bit_quant_type="nf4",
29
+ # bnb_4bit_compute_dtype=torch.bfloat16,
30
+ # bnb_4bit_use_double_quant=True,
31
+ # )
32
+
33
+ # # Repo ID for the PEFT model
34
+ # peft_model_id = f"{username}/{output_dir}" # e.g., ritvik77/Mixtral-7B-LoRA-Salesforce-Optimized-AI-AgentCall
35
+ # device = "auto"
36
+
37
+ # # Load PEFT config from the Hub
38
+ # config = PeftConfig.from_pretrained(peft_model_id)
39
+
40
+ # # Load the base model (e.g., Mistral-7B) with quantization
41
+ # model = AutoModelForCausalLM.from_pretrained(
42
+ # config.base_model_name_or_path, # Base model ID stored in PEFT config
43
+ # device_map="auto",
44
+ # quantization_config=bnb_config, # Apply 4-bit quantization
45
+ # )
46
+
47
+ # # Load tokenizer from the PEFT model repo
48
+ # tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
49
+
50
+ # # Resize token embeddings to match tokenizer (if needed)
51
+ # model.resize_token_embeddings(len(tokenizer))
52
+
53
+ # # Load PEFT adapters and apply them to the base model
54
+ # model = PeftModel.from_pretrained(model, peft_model_id)
55
+
56
+ # # Convert model to bfloat16 and set to evaluation mode
57
+ # model.to(torch.bfloat16)
58
+ # model.eval()
59
+
60
+ import torch
61
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
62
+ from peft import PeftModel, PeftConfig
63
+
64
+ # βœ… Quantization config for 4-bit loading (Memory Optimization)
65
+ bnb_config = BitsAndBytesConfig(
66
+ load_in_4bit=True,
67
+ bnb_4bit_quant_type="nf4", # βœ… Improved precision for LoRA weights
68
+ bnb_4bit_compute_dtype=torch.bfloat16,
69
+ bnb_4bit_use_double_quant=True, # βœ… Reduces VRAM overhead
70
+ )
71
+
72
+ # βœ… Load tokenizer from fine-tuned checkpoint (Ensures token consistency)
73
+ peft_model_id = "ritvik77/Doctor_AI_LoRA-Mistral-7B-Instructritvik77"
74
+ tokenizer = AutoTokenizer.from_pretrained(peft_model_id, trust_remote_code=True)
75
+
76
+ # βœ… Ensure `pad_token` is correctly assigned
77
+ if tokenizer.pad_token is None:
78
+ tokenizer.pad_token = tokenizer.eos_token
79
+
80
+ # βœ… Load Base Model with Quantization for Memory Efficiency
81
+ model_name = "mistralai/Mistral-7B-Instruct-v0.3"
82
+ model = AutoModelForCausalLM.from_pretrained(
83
+ model_name,
84
+ device_map="auto", # βœ… Efficiently maps to available GPUs
85
+ quantization_config=bnb_config, # βœ… Efficient quantization for large models
86
+ torch_dtype=torch.bfloat16
87
+ )
88
+
89
+ # βœ… Resize Token Embeddings BEFORE Loading LoRA Adapter (Prevents size mismatch)
90
+ model.resize_token_embeddings(len(tokenizer))
91
+
92
+ # βœ… Load PEFT Adapter (LoRA Weights)
93
+ model = PeftModel.from_pretrained(model, peft_model_id)
94
+
95
+ # βœ… Unfreeze LoRA layers to ensure they are trainable
96
+ for name, param in model.named_parameters():
97
+ if "lora" in name:
98
+ param.requires_grad = True
99
+
100
+ # βœ… Confirm LoRA Layers Are Active
101
+ if hasattr(model, 'print_trainable_parameters'):
102
+ model.print_trainable_parameters()
103
+ else:
104
+ print("❗ Warning: LoRA adapter may not have loaded correctly.")
105
+
106
+ # βœ… Ensure model is in evaluation mode for inference
107
+ model.eval()
108
+
109
+ # βœ… Sample Inference Code
110
+ def generate_response(prompt, max_new_tokens=300, temperature=0.7):
111
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
112
+ outputs = model.generate(
113
+ **inputs,
114
+ max_new_tokens=max_new_tokens,
115
+ do_sample=True,
116
+ temperature=temperature
117
+ )
118
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
119
+
120
+ # βœ… Sample Prompt for Medical Diagnosis
121
+ prompt = "Patient reports chest pain and shortness of breath. What might be the diagnosis?"
122
+ response = generate_response(prompt)
123
+ print("\n🩺 **Diagnosis:**", response)
124
+
125
+ print("πŸš€ PEFT model loaded successfully with resized embeddings!")
126
+
127
+
128
+
129
 
 
 
 
 
 
130
 
131
  ## Training procedure
132