import torch
model_path = ""
# Load the tokenizer and set the padding token to the eos_token.
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto"
).to("cuda")
def generate_response(user_input):
instruction = """You are chatbot proficient in Nepalese Language."""
messages = [
{"role": "system", "content": instruction},
{"role": "user", "content": user_input}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")
outputs = model.generate(**inputs, max_new_tokens=500, num_return_sequences=1)
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response_text.split("assistant")[1].strip()
user_query = "राणा शासनले नेपाल कसरी कब्जा गर्यो भनेर व्याख्या गर्न सक्नुहुन्छ?"
response = generate_response(user_query)
print("Chatbot:", response)
- Downloads last month
- 4
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support
Model tree for MrBinit/Llama3.2B-Nepali-Language-Model
Base model
meta-llama/Llama-3.2-3B-Instruct