[INFO|2025-04-21 17:36:49] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/config.json

[INFO|2025-04-21 17:36:49] configuration_utils.py:771 >> Model config LlavaNextConfig {
  "_name_or_path": "llava-hf/llava-v1.6-mistral-7b-hf",
  "architectures": [
    "LlavaNextForConditionalGeneration"
  ],
  "ignore_index": -100,
  "image_grid_pinpoints": [
    [
      336,
      672
    ],
    [
      672,
      336
    ],
    [
      672,
      672
    ],
    [
      1008,
      336
    ],
    [
      336,
      1008
    ]
  ],
  "image_seq_length": 576,
  "image_token_index": 32000,
  "model_type": "llava_next",
  "multimodal_projector_bias": true,
  "projector_hidden_act": "gelu",
  "text_config": {
    "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
    "architectures": [
      "MistralForCausalLM"
    ],
    "intermediate_size": 14336,
    "max_position_embeddings": 32768,
    "model_type": "mistral",
    "num_key_value_heads": 8,
    "rms_norm_eps": 1e-05,
    "rope_theta": 1000000.0,
    "sliding_window": null,
    "torch_dtype": "bfloat16",
    "vocab_size": 32064
  },
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.49.0",
  "use_image_newline_parameter": true,
  "vision_config": {
    "hidden_size": 1024,
    "image_size": 336,
    "intermediate_size": 4096,
    "model_type": "clip_vision_model",
    "num_attention_heads": 16,
    "num_hidden_layers": 24,
    "patch_size": 14,
    "projection_dim": 768,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default",
  "vocab_size": 32064
}


[INFO|2025-04-21 17:36:49] tokenization_utils_base.py:2050 >> loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/tokenizer.model

[INFO|2025-04-21 17:36:49] tokenization_utils_base.py:2050 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/tokenizer.json

[INFO|2025-04-21 17:36:49] tokenization_utils_base.py:2050 >> loading file added_tokens.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/added_tokens.json

[INFO|2025-04-21 17:36:49] tokenization_utils_base.py:2050 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/special_tokens_map.json

[INFO|2025-04-21 17:36:49] tokenization_utils_base.py:2050 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/tokenizer_config.json

[INFO|2025-04-21 17:36:49] tokenization_utils_base.py:2050 >> loading file chat_template.jinja from cache at None

[INFO|2025-04-21 17:36:49] tokenization_utils_base.py:2313 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.

[INFO|2025-04-21 17:36:50] processing_utils.py:816 >> loading configuration file processor_config.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/processor_config.json

[INFO|2025-04-21 17:36:50] image_processing_base.py:381 >> loading configuration file preprocessor_config.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/preprocessor_config.json

[WARNING|2025-04-21 17:36:50] logging.py:329 >> Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.

[INFO|2025-04-21 17:36:50] image_processing_base.py:434 >> Image processor LlavaNextImageProcessor {
  "aspect_ratio_setting": "anyres",
  "crop_size": {
    "height": 336,
    "width": 336
  },
  "do_center_crop": true,
  "do_convert_rgb": true,
  "do_normalize": true,
  "do_pad": true,
  "do_rescale": true,
  "do_resize": true,
  "image_grid_pinpoints": [
    [
      336,
      672
    ],
    [
      672,
      336
    ],
    [
      672,
      672
    ],
    [
      1008,
      336
    ],
    [
      336,
      1008
    ]
  ],
  "image_mean": [
    0.48145466,
    0.4578275,
    0.40821073
  ],
  "image_processor_type": "LlavaNextImageProcessor",
  "image_std": [
    0.26862954,
    0.26130258,
    0.27577711
  ],
  "processor_class": "LlavaNextProcessor",
  "resample": 3,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "shortest_edge": 336
  }
}


[INFO|2025-04-21 17:36:50] tokenization_utils_base.py:2050 >> loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/tokenizer.model

[INFO|2025-04-21 17:36:50] tokenization_utils_base.py:2050 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/tokenizer.json

[INFO|2025-04-21 17:36:50] tokenization_utils_base.py:2050 >> loading file added_tokens.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/added_tokens.json

[INFO|2025-04-21 17:36:50] tokenization_utils_base.py:2050 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/special_tokens_map.json

[INFO|2025-04-21 17:36:50] tokenization_utils_base.py:2050 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/tokenizer_config.json

[INFO|2025-04-21 17:36:50] tokenization_utils_base.py:2050 >> loading file chat_template.jinja from cache at None

[INFO|2025-04-21 17:36:50] tokenization_utils_base.py:2313 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.

[INFO|2025-04-21 17:36:50] processing_utils.py:816 >> loading configuration file processor_config.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/processor_config.json

[INFO|2025-04-21 17:36:51] processing_utils.py:876 >> Processor LlavaNextProcessor:
- image_processor: LlavaNextImageProcessor {
  "aspect_ratio_setting": "anyres",
  "crop_size": {
    "height": 336,
    "width": 336
  },
  "do_center_crop": true,
  "do_convert_rgb": true,
  "do_normalize": true,
  "do_pad": true,
  "do_rescale": true,
  "do_resize": true,
  "image_grid_pinpoints": [
    [
      336,
      672
    ],
    [
      672,
      336
    ],
    [
      672,
      672
    ],
    [
      1008,
      336
    ],
    [
      336,
      1008
    ]
  ],
  "image_mean": [
    0.48145466,
    0.4578275,
    0.40821073
  ],
  "image_processor_type": "LlavaNextImageProcessor",
  "image_std": [
    0.26862954,
    0.26130258,
    0.27577711
  ],
  "processor_class": "LlavaNextProcessor",
  "resample": 3,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "shortest_edge": 336
  }
}

- tokenizer: LlamaTokenizerFast(name_or_path='llava-hf/llava-v1.6-mistral-7b-hf', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'image_token': '<image>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	32000: AddedToken("<image>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	32001: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

{
  "image_token": "<image>",
  "num_additional_image_tokens": 1,
  "patch_size": 14,
  "processor_class": "LlavaNextProcessor",
  "vision_feature_select_strategy": "default"
}


[INFO|2025-04-21 17:36:51] logging.py:157 >> Loading dataset MattCoddity/dockerNLcommands...

[INFO|2025-04-21 17:36:53] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/config.json

[INFO|2025-04-21 17:36:53] configuration_utils.py:771 >> Model config LlavaNextConfig {
  "_name_or_path": "llava-hf/llava-v1.6-mistral-7b-hf",
  "architectures": [
    "LlavaNextForConditionalGeneration"
  ],
  "ignore_index": -100,
  "image_grid_pinpoints": [
    [
      336,
      672
    ],
    [
      672,
      336
    ],
    [
      672,
      672
    ],
    [
      1008,
      336
    ],
    [
      336,
      1008
    ]
  ],
  "image_seq_length": 576,
  "image_token_index": 32000,
  "model_type": "llava_next",
  "multimodal_projector_bias": true,
  "projector_hidden_act": "gelu",
  "text_config": {
    "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
    "architectures": [
      "MistralForCausalLM"
    ],
    "intermediate_size": 14336,
    "max_position_embeddings": 32768,
    "model_type": "mistral",
    "num_key_value_heads": 8,
    "rms_norm_eps": 1e-05,
    "rope_theta": 1000000.0,
    "sliding_window": null,
    "torch_dtype": "bfloat16",
    "vocab_size": 32064
  },
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.49.0",
  "use_image_newline_parameter": true,
  "vision_config": {
    "hidden_size": 1024,
    "image_size": 336,
    "intermediate_size": 4096,
    "model_type": "clip_vision_model",
    "num_attention_heads": 16,
    "num_hidden_layers": 24,
    "patch_size": 14,
    "projection_dim": 768,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default",
  "vocab_size": 32064
}


[INFO|2025-04-21 17:36:53] logging.py:157 >> Quantizing model to 4 bit with bitsandbytes.

[INFO|2025-04-21 17:36:53] modeling_utils.py:3982 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/model.safetensors.index.json

[INFO|2025-04-21 17:36:53] modeling_utils.py:1633 >> Instantiating LlavaNextForConditionalGeneration model under default dtype torch.float16.

[INFO|2025-04-21 17:36:53] configuration_utils.py:1140 >> Generate config GenerationConfig {}


[INFO|2025-04-21 17:36:54] modeling_utils.py:1633 >> Instantiating CLIPVisionModel model under default dtype torch.float16.

[INFO|2025-04-21 17:36:54] modeling_utils.py:1633 >> Instantiating MistralForCausalLM model under default dtype torch.float16.

[INFO|2025-04-21 17:36:54] configuration_utils.py:1140 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2
}


[INFO|2025-04-21 17:37:57] modeling_utils.py:4970 >> All model checkpoint weights were used when initializing LlavaNextForConditionalGeneration.


[INFO|2025-04-21 17:37:57] modeling_utils.py:4978 >> All the weights of LlavaNextForConditionalGeneration were initialized from the model checkpoint at llava-hf/llava-v1.6-mistral-7b-hf.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlavaNextForConditionalGeneration for predictions without further training.

[INFO|2025-04-21 17:37:58] configuration_utils.py:1095 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/generation_config.json

[INFO|2025-04-21 17:37:58] configuration_utils.py:1140 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2
}


[INFO|2025-04-21 17:37:58] logging.py:157 >> Gradient checkpointing enabled.

[INFO|2025-04-21 17:37:58] logging.py:157 >> Casting multimodal projector outputs in torch.float16.

[INFO|2025-04-21 17:37:58] logging.py:157 >> Using torch SDPA for faster training and inference.

[INFO|2025-04-21 17:37:58] logging.py:157 >> Upcasting trainable params to float32.

[INFO|2025-04-21 17:37:58] logging.py:157 >> Fine-tuning method: LoRA

[INFO|2025-04-21 17:37:58] logging.py:157 >> Found linear modules: q_proj,v_proj,k_proj,gate_proj,up_proj,o_proj,down_proj

[INFO|2025-04-21 17:37:58] logging.py:157 >> Set vision model not trainable: ['vision_tower'].

[INFO|2025-04-21 17:37:58] logging.py:157 >> Set multi model projector not trainable: multi_modal_projector.

[INFO|2025-04-21 17:37:58] logging.py:157 >> trainable params: 20,971,520 || all params: 7,587,719,168 || trainable%: 0.2764

[INFO|2025-04-21 17:37:58] trainer.py:746 >> Using auto half precision backend

[WARNING|2025-04-21 17:37:58] trainer.py:781 >> No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.

[INFO|2025-04-21 17:37:59] trainer.py:2405 >> ***** Running training *****

[INFO|2025-04-21 17:37:59] trainer.py:2406 >>   Num examples = 2,415

[INFO|2025-04-21 17:37:59] trainer.py:2407 >>   Num Epochs = 3

[INFO|2025-04-21 17:37:59] trainer.py:2408 >>   Instantaneous batch size per device = 2

[INFO|2025-04-21 17:37:59] trainer.py:2411 >>   Total train batch size (w. parallel, distributed & accumulation) = 16

[INFO|2025-04-21 17:37:59] trainer.py:2412 >>   Gradient Accumulation steps = 8

[INFO|2025-04-21 17:37:59] trainer.py:2413 >>   Total optimization steps = 453

[INFO|2025-04-21 17:37:59] trainer.py:2414 >>   Number of trainable parameters = 20,971,520

[WARNING|2025-04-21 17:38:00] logging.py:329 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.

[INFO|2025-04-21 17:42:39] logging.py:157 >> {'loss': 10.8058, 'learning_rate': 1.9998e-04, 'epoch': 0.03, 'throughput': 350.83}

[INFO|2025-04-21 17:47:20] logging.py:157 >> {'loss': 2.5613, 'learning_rate': 1.9985e-04, 'epoch': 0.07, 'throughput': 350.51}

[INFO|2025-04-21 17:52:02] logging.py:157 >> {'loss': 1.4579, 'learning_rate': 1.9959e-04, 'epoch': 0.10, 'throughput': 350.51}

[INFO|2025-04-21 17:56:43] logging.py:157 >> {'loss': 1.1407, 'learning_rate': 1.9922e-04, 'epoch': 0.13, 'throughput': 350.55}

[INFO|2025-04-21 18:01:24] logging.py:157 >> {'loss': 0.7902, 'learning_rate': 1.9873e-04, 'epoch': 0.17, 'throughput': 350.53}

[INFO|2025-04-21 18:06:04] logging.py:157 >> {'loss': 0.9830, 'learning_rate': 1.9812e-04, 'epoch': 0.20, 'throughput': 350.53}

[INFO|2025-04-21 18:10:45] logging.py:157 >> {'loss': 0.9299, 'learning_rate': 1.9739e-04, 'epoch': 0.23, 'throughput': 350.57}

[INFO|2025-04-21 18:15:26] logging.py:157 >> {'loss': 0.6314, 'learning_rate': 1.9655e-04, 'epoch': 0.26, 'throughput': 350.66}

[INFO|2025-04-21 18:20:06] logging.py:157 >> {'loss': 0.8311, 'learning_rate': 1.9559e-04, 'epoch': 0.30, 'throughput': 350.69}

[INFO|2025-04-21 18:24:46] logging.py:157 >> {'loss': 0.5493, 'learning_rate': 1.9451e-04, 'epoch': 0.33, 'throughput': 350.73}

[INFO|2025-04-21 18:29:27] logging.py:157 >> {'loss': 0.4694, 'learning_rate': 1.9332e-04, 'epoch': 0.36, 'throughput': 350.78}

[INFO|2025-04-21 18:34:07] logging.py:157 >> {'loss': 0.5595, 'learning_rate': 1.9202e-04, 'epoch': 0.40, 'throughput': 350.83}

[INFO|2025-04-21 18:38:47] logging.py:157 >> {'loss': 0.2787, 'learning_rate': 1.9061e-04, 'epoch': 0.43, 'throughput': 350.84}

[INFO|2025-04-21 18:43:28] logging.py:157 >> {'loss': 0.5269, 'learning_rate': 1.8971e-04, 'epoch': 0.46, 'throughput': 350.83}

[INFO|2025-04-21 18:48:09] logging.py:157 >> {'loss': 0.7782, 'learning_rate': 1.8812e-04, 'epoch': 0.50, 'throughput': 350.82}

[INFO|2025-04-21 18:52:48] logging.py:157 >> {'loss': 0.5458, 'learning_rate': 1.8643e-04, 'epoch': 0.53, 'throughput': 350.83}

[INFO|2025-04-21 18:57:27] logging.py:157 >> {'loss': 0.3148, 'learning_rate': 1.8463e-04, 'epoch': 0.56, 'throughput': 350.84}

[INFO|2025-04-21 19:02:08] logging.py:157 >> {'loss': 0.3010, 'learning_rate': 1.8274e-04, 'epoch': 0.60, 'throughput': 350.83}

[INFO|2025-04-21 19:06:49] logging.py:157 >> {'loss': 0.6369, 'learning_rate': 1.8074e-04, 'epoch': 0.63, 'throughput': 350.82}

[INFO|2025-04-21 19:09:03] trainer.py:2657 >> 

Training completed. Do not forget to share your model on huggingface.co/models =)


[INFO|2025-04-21 19:09:03] image_processing_base.py:261 >> Image processor saved in saves/LLaVA-NeXT-Mistral-7B-Chat/lora/train_2025-04-21-17-35-28/preprocessor_config.json

[INFO|2025-04-21 19:09:03] tokenization_utils_base.py:2500 >> tokenizer config file saved in saves/LLaVA-NeXT-Mistral-7B-Chat/lora/train_2025-04-21-17-35-28/tokenizer_config.json

[INFO|2025-04-21 19:09:03] tokenization_utils_base.py:2509 >> Special tokens file saved in saves/LLaVA-NeXT-Mistral-7B-Chat/lora/train_2025-04-21-17-35-28/special_tokens_map.json

[INFO|2025-04-21 19:09:03] processing_utils.py:638 >> chat template saved in saves/LLaVA-NeXT-Mistral-7B-Chat/lora/train_2025-04-21-17-35-28/chat_template.json

[INFO|2025-04-21 19:09:03] processing_utils.py:644 >> processor saved in saves/LLaVA-NeXT-Mistral-7B-Chat/lora/train_2025-04-21-17-35-28/processor_config.json

[INFO|2025-04-21 19:09:03] trainer.py:3942 >> Saving model checkpoint to saves/LLaVA-NeXT-Mistral-7B-Chat/lora/train_2025-04-21-17-35-28

[INFO|2025-04-21 19:09:03] configuration_utils.py:699 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--llava-hf--llava-v1.6-mistral-7b-hf/snapshots/144bfb964d4eef1502a22af4c5ff20d0d4a94cc1/config.json

[INFO|2025-04-21 19:09:03] configuration_utils.py:771 >> Model config LlavaNextConfig {
  "architectures": [
    "LlavaNextForConditionalGeneration"
  ],
  "ignore_index": -100,
  "image_grid_pinpoints": [
    [
      336,
      672
    ],
    [
      672,
      336
    ],
    [
      672,
      672
    ],
    [
      1008,
      336
    ],
    [
      336,
      1008
    ]
  ],
  "image_seq_length": 576,
  "image_token_index": 32000,
  "model_type": "llava_next",
  "multimodal_projector_bias": true,
  "projector_hidden_act": "gelu",
  "text_config": {
    "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
    "architectures": [
      "MistralForCausalLM"
    ],
    "intermediate_size": 14336,
    "max_position_embeddings": 32768,
    "model_type": "mistral",
    "num_key_value_heads": 8,
    "rms_norm_eps": 1e-05,
    "rope_theta": 1000000.0,
    "sliding_window": null,
    "torch_dtype": "bfloat16",
    "vocab_size": 32064
  },
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.49.0",
  "use_image_newline_parameter": true,
  "vision_config": {
    "hidden_size": 1024,
    "image_size": 336,
    "intermediate_size": 4096,
    "model_type": "clip_vision_model",
    "num_attention_heads": 16,
    "num_hidden_layers": 24,
    "patch_size": 14,
    "projection_dim": 768,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default",
  "vocab_size": 32064
}


[INFO|2025-04-21 19:09:04] tokenization_utils_base.py:2500 >> tokenizer config file saved in saves/LLaVA-NeXT-Mistral-7B-Chat/lora/train_2025-04-21-17-35-28/tokenizer_config.json

[INFO|2025-04-21 19:09:04] tokenization_utils_base.py:2509 >> Special tokens file saved in saves/LLaVA-NeXT-Mistral-7B-Chat/lora/train_2025-04-21-17-35-28/special_tokens_map.json

[WARNING|2025-04-21 19:09:04] logging.py:162 >> No metric eval_loss to plot.

[WARNING|2025-04-21 19:09:04] logging.py:162 >> No metric eval_accuracy to plot.

[INFO|2025-04-21 19:09:04] modelcard.py:449 >> Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}