erynn-1-774m / inference_final.py
NextGenC's picture
Upload 9 files
4ccf135 verified
raw
history blame contribute delete
3.28 kB
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
# Paths to model and adapter
MODEL_PATH = r"C:\Users\j\Desktop\Erynn\gpt2-large"
ADAPTER_PATH = r"C:\Users\j\Desktop\Erynn\erynn_adapter"
def load_model():
"""Load the model and tokenizer."""
# Load model with low memory usage
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
device_map="auto",
torch_dtype=torch.float16
)
# Add LoRA adapter
model = PeftModel.from_pretrained(model, ADAPTER_PATH)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
tokenizer.pad_token = tokenizer.eos_token
return model, tokenizer
def get_response(model, tokenizer, instruction, context=None):
"""
Generate a response for the given instruction and optional context.
Example: get_response(model, tokenizer, "Write an ad for a phone")
"""
# Build simple prompt
prompt = f"Instruction: {instruction}\n"
if context and context.strip():
prompt += f"Context: {context}\n"
prompt += "Response: "
# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
output = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=100, # Short and focused responses
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2,
do_sample=True, # Added for warnings
pad_token_id=tokenizer.eos_token_id
)
# Extract response
response = tokenizer.decode(output[0], skip_special_tokens=True)
response_start = response.find("Response: ") + len("Response: ")
return response[response_start:].strip()
def main():
"""Run example instructions to test the model."""
print("Erynn is ready! Testing some examples...\n")
# Load model and tokenizer
model, tokenizer = load_model()
# Test 1: Short explanation
print("Test 1: Explain AI briefly")
response = get_response(model, tokenizer, "Explain artificial intelligence in 50 words or less.")
print(response, "\n" + "-"*40)
# Test 2: Summarization
print("\nTest 2: Summarize this text")
context = "Deep learning is a key AI technology. It excels in computer vision and natural language processing, driving advances in image recognition and speech synthesis."
response = get_response(model, tokenizer, "Summarize this text in 30 words or less.", context)
print(response, "\n" + "-"*40)
# Test 3: Advertisement
print("\nTest 3: Write a smartwatch ad")
response = get_response(model, tokenizer, "Write a short advertisement for a smartwatch in 40 words.")
print(response, "\n" + "-"*40)
# Test 4: List
print("\nTest 4: List Python advantages")
response = get_response(model, tokenizer, "List three advantages of Python programming.")
print(response)
print("\nTry your own instruction: get_response(model, tokenizer, 'Your instruction here')")
if __name__ == "__main__":
main()