|
--- |
|
base_model: unsloth/llama-3.2-3b-instruct-unsloth-bnb-4bit |
|
tags: |
|
- text-generation-inference |
|
- transformers |
|
- unsloth |
|
- llama |
|
- trl |
|
- sft |
|
language: |
|
- en |
|
--- |
|
|
|
# Uploaded model |
|
|
|
- **Developed by:** mervinpraison |
|
- **Finetuned from model :** unsloth/llama-3.2-3b-instruct-unsloth-bnb-4bit |
|
|
|
```yaml |
|
dataset: |
|
- name: mervinpraison/harup-fall-axis-alpaca |
|
dataset_num_proc: 2 |
|
dataset_text_field: text |
|
gradient_accumulation_steps: 2 |
|
hf_model_name: mervinpraison/llama3.2-3B-harupfall-axis |
|
huggingface_save: 'true' |
|
learning_rate: 0.0001 |
|
load_in_4bit: true |
|
loftq_config: null |
|
logging_steps: 15 |
|
lora_alpha: 16 |
|
lora_bias: none |
|
lora_dropout: 0 |
|
lora_r: 16 |
|
lora_target_modules: |
|
- q_proj |
|
- k_proj |
|
- v_proj |
|
- o_proj |
|
- gate_proj |
|
- up_proj |
|
- down_proj |
|
lr_scheduler_type: linear |
|
max_seq_length: 2048 |
|
max_steps: 6000 |
|
model_name: unsloth/Llama-3.2-3B-Instruct-bnb-4bit |
|
model_parameters: 3b |
|
num_train_epochs: 10 |
|
ollama_model: mervinpraison/llama3.2-3B-harupfall-axis |
|
ollama_save: 'true' |
|
optim: lion_8bit |
|
output_dir: outputs |
|
packing: false |
|
per_device_train_batch_size: 1 |
|
quantization_method: |
|
- q4_k_m |
|
random_state: 3407 |
|
seed: 3407 |
|
train: 'true' |
|
use_gradient_checkpointing: unsloth |
|
use_rslora: false |
|
warmup_steps: 100 |
|
weight_decay: 0.05 |
|
``` |
|
|
|
|
|
Training Details: [wand](https://wandb.ai/praisonresearch/praisonai-fall/runs/ghzw8mi2) |