dataloader_config: !!python/object:src.datamodule.DataloaderConfig | |
batch_size: 16 | |
drop_last: false | |
eval_batch_size: 128 | |
multiprocessing_context: null | |
num_workers: 12 | |
persistent_workers: false | |
pin_memory: true | |
prefetch_factor: 2 | |
shuffle: true | |
eod_token_id: 0 | |
max_position_embeddings: 2048 | |
optim_config: !!python/object:src.module.OptimCofig | |
keller_kwargs: {} | |
lr: 0.0006 | |
num_warmup_steps: 2000 | |
optim_kwargs: | |
betas: | |
- 0.9 | |
- 0.95 | |
eps: 1.0e-08 | |
fused: true | |
optim_name: adamw | |
scheduler_kwargs: | |
min_lr_ratio: 0.01 | |
num_decay_steps: 2000 | |
num_stable_steps: 46000 | |
scheduler_name: warmup_stable_decay | |
weight_decay: 0.1 | |
train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train | |
val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation | |