NextGenC
/

erynn-1-774m

Text Generation

instruction-tuning

Model card Files Files and versions Community

erynn-1-774m / inference_final.py

NextGenC's picture

Upload 9 files

4ccf135 verified 4 days ago

history blame contribute delete

3.28 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel

	# Paths to model and adapter
	MODEL_PATH = r"C:\Users\j\Desktop\Erynn\gpt2-large"
	ADAPTER_PATH = r"C:\Users\j\Desktop\Erynn\erynn_adapter"

	def load_model():
	"""Load the model and tokenizer."""
	# Load model with low memory usage
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_PATH,
	device_map="auto",
	torch_dtype=torch.float16
	)
	# Add LoRA adapter
	model = PeftModel.from_pretrained(model, ADAPTER_PATH)
	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
	tokenizer.pad_token = tokenizer.eos_token
	return model, tokenizer

	def get_response(model, tokenizer, instruction, context=None):
	"""
	Generate a response for the given instruction and optional context.
	Example: get_response(model, tokenizer, "Write an ad for a phone")
	"""
	# Build simple prompt
	prompt = f"Instruction: {instruction}\n"
	if context and context.strip():
	prompt += f"Context: {context}\n"
	prompt += "Response: "

	# Tokenize input
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	# Generate response
	with torch.no_grad():
	output = model.generate(
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_new_tokens=100, # Short and focused responses
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.2,
	do_sample=True, # Added for warnings
	pad_token_id=tokenizer.eos_token_id
	)

	# Extract response
	response = tokenizer.decode(output[0], skip_special_tokens=True)
	response_start = response.find("Response: ") + len("Response: ")
	return response[response_start:].strip()

	def main():
	"""Run example instructions to test the model."""
	print("Erynn is ready! Testing some examples...\n")

	# Load model and tokenizer
	model, tokenizer = load_model()

	# Test 1: Short explanation
	print("Test 1: Explain AI briefly")
	response = get_response(model, tokenizer, "Explain artificial intelligence in 50 words or less.")
	print(response, "\n" + "-"*40)

	# Test 2: Summarization
	print("\nTest 2: Summarize this text")
	context = "Deep learning is a key AI technology. It excels in computer vision and natural language processing, driving advances in image recognition and speech synthesis."
	response = get_response(model, tokenizer, "Summarize this text in 30 words or less.", context)
	print(response, "\n" + "-"*40)

	# Test 3: Advertisement
	print("\nTest 3: Write a smartwatch ad")
	response = get_response(model, tokenizer, "Write a short advertisement for a smartwatch in 40 words.")
	print(response, "\n" + "-"*40)

	# Test 4: List
	print("\nTest 4: List Python advantages")
	response = get_response(model, tokenizer, "List three advantages of Python programming.")
	print(response)

	print("\nTry your own instruction: get_response(model, tokenizer, 'Your instruction here')")

	if __name__ == "__main__":
	main()