hw3
Collection
My models from homework 3
•
3 items
•
Updated
This is the fintuned version of OuteAI/Lite-Oute-1-300M-Instruct
on text sentiment classification task into 3
categoriest: positive
, negative
or neutral
. Finetuneing was performed using LoRA.
from safetensors.torch import load_file
from huggingface_hub import hf_hub_download
REPO_NAME = dzhuj/llm-course-hw3
model = AutoModelForCausalLM.from_pretrained(f"{REPO_NAME}-lora", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(f"{REPO_NAME}-lora")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
# Примените peft к модели
apply_peft_to_module(model, LinearWithLoRA, r=8, alpha=16, target_submodules=["k_proj", "v_proj"])
model.to(DEVICE)
path = hf_hub_download(f"{REPO_NAME}-lora", "model.safetensors")
state_dict = load_file(path)
model.load_state_dict(state_dict, strict=False)
LoRA_saved_model_accuracy = eval(model, dataset["test"], tokenizer)
print(f"Accuracy after LoRA training: {LoRA_saved_model_accuracy:.2f}")
The following dataset was used: cardiffnlp/tweet_eval
Example:
text: @user Alciato: Bee will invest 150 million in January, another 200 in the Summer and plans to bring Messi by 2017"
label: 2
str_label: positive
lora_r
= 8;lora_alpha
= 16;target_submodules
= ["k_proj", "v_proj"];batch_size
= 16;lr
= 5e-4;num_epochs
= 1The following testing protocol:
@torch.inference_mode
def eval(model, dataset, tokenizer, show_conf_m=True, batch_size=100):
"""Evaluates the given model on the provided dataset.
Parameters:
model: The language model used for generating sentiment predictions.
dataset: An iterable collection of examples, where each example is a dict with keys:
- "input_ids": The input text message.
- "str_label": The ground truth sentiment label (e.g., "positive" or "negative").
Returns:
float: The macro f1 score
"""
name2idx = {v: k for k, v in IDX2NAME.items()}
name2idx[""] = len(name2idx)
ground_truth = []
predicted = []
for examples in tqdm(dataset.batch(batch_size)):
input_ids = pad(list(map(torch.tensor, examples["input_ids"])), padding_value=tokenizer.pad_token_id).to(DEVICE)
attention_mask = pad(list(map(lambda it: torch.ones(len(it)), examples["input_ids"])), padding_value=0).to(
DEVICE
)
output_ids = model.generate(input_ids, attention_mask=attention_mask, max_new_tokens=16)
shrinked_ids = output_ids[:, input_ids.shape[1] :]
texts = tokenizer.batch_decode(shrinked_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
for i in range(len(examples["str_label"])):
predicted_sentiment = postprocess_sentiment(texts[i])
ground_truth.append(name2idx[examples["str_label"][i]])
predicted.append(name2idx[predicted_sentiment])
if show_conf_m:
conf_m = confusion_matrix(ground_truth, predicted, labels=list(name2idx.values()))
disp = ConfusionMatrixDisplay(conf_m, display_labels=list(name2idx.keys()))
disp.plot()
f1 = f1_score(ground_truth, predicted, labels=list(name2idx.values()), average="macro", zero_division=0.0)
return f1
Base model
OuteAI/Lite-Oute-1-300M-Instruct