pietrolesci commited on
Commit
b2a8252
·
verified ·
1 Parent(s): 3ae021b

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Experiment Configuration
2
+ ```yaml
3
+ callbacks:
4
+ grad_accum:
5
+ _target_: src.callbacks.gradient_accumulation.GradientAccumulationScheduler
6
+ scheduling:
7
+ 0: 2
8
+ grad_norm:
9
+ _target_: src.callbacks.grad_norm.GradNorm
10
+ check_clipping: false
11
+ group_separator: /
12
+ histogram_freq: null
13
+ log_weight_distribution: false
14
+ norm_type: 2
15
+ only_total: true
16
+ lr_monitor:
17
+ _target_: src.callbacks.lr_monitor.SimpleLearningRateMonitor
18
+ model_checkpoint:
19
+ _target_: src.callbacks.model_checkpoint.ModelCheckpoint
20
+ dirpath: .checkpoints
21
+ enable_version_counter: false
22
+ every_n_train_steps: 2000
23
+ filename: '{step}'
24
+ save_initial_checkpoint: true
25
+ save_last: link
26
+ save_top_k: -1
27
+ verbose: true
28
+ speed_monitor:
29
+ _target_: src.callbacks.speed_monitor.SpeedMonitor
30
+ data:
31
+ batch_size: 16
32
+ drop_last: false
33
+ eval_batch_size: 64
34
+ multiprocessing_context: null
35
+ num_workers: 12
36
+ persistent_workers: false
37
+ pin_memory: true
38
+ prefetch_factor: 2
39
+ shuffle: true
40
+ dataset: minipile
41
+ loggers:
42
+ tensorboard:
43
+ _target_: src.loggers.TensorBoardLogger
44
+ name: ''
45
+ save_dir: ./
46
+ version: null
47
+ model: smol_llama-1B
48
+ optim:
49
+ lr: 0.0006
50
+ num_warmup_steps: 2000
51
+ optim_kwargs:
52
+ betas:
53
+ - 0.9
54
+ - 0.95
55
+ eps: 1.0e-08
56
+ fused: true
57
+ optim_name: adamw
58
+ scheduler_kwargs:
59
+ min_lr_ratio: 0.01
60
+ num_decay_steps: 2000
61
+ num_stable_steps: 46000
62
+ scheduler_name: warmup_stable_decay
63
+ weight_decay: 0.1
64
+ out_parent_folder: model_train
65
+ pwd: /home/pl487/rds/hpc-work/rdd
66
+ resume_from_checkpoint: .checkpoints/last.ckpt
67
+ run_folder: .
68
+ save_initial_checkpoint: true
69
+ seed: 42
70
+ tok_name: bpe32000minipile
71
+ tok_path: /home/pl487/rds/hpc-work/rdd/outputs/tokenizers/bpe32000minipile
72
+ torch_compile: true
73
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
74
+ trainer:
75
+ accelerator: gpu
76
+ deterministic: false
77
+ devices: 4
78
+ enable_progress_bar: true
79
+ fast_dev_run: false
80
+ gradient_clip_algorithm: norm
81
+ gradient_clip_val: 1.0
82
+ limit_val_batches: 500
83
+ log_every_n_steps: 1
84
+ max_steps: 50000
85
+ precision: bf16-true
86
+ val_check_interval: 2000
87
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
88
+ ```
hparams.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ loggers:
2
+ tensorboard:
3
+ _target_: src.loggers.TensorBoardLogger
4
+ save_dir: ./
5
+ name: ''
6
+ version: null
7
+ callbacks:
8
+ lr_monitor:
9
+ _target_: src.callbacks.lr_monitor.SimpleLearningRateMonitor
10
+ grad_norm:
11
+ _target_: src.callbacks.grad_norm.GradNorm
12
+ norm_type: 2
13
+ group_separator: /
14
+ histogram_freq: null
15
+ check_clipping: false
16
+ log_weight_distribution: false
17
+ only_total: true
18
+ speed_monitor:
19
+ _target_: src.callbacks.speed_monitor.SpeedMonitor
20
+ grad_accum:
21
+ _target_: src.callbacks.gradient_accumulation.GradientAccumulationScheduler
22
+ scheduling:
23
+ 0: 2
24
+ model_checkpoint:
25
+ _target_: src.callbacks.model_checkpoint.ModelCheckpoint
26
+ dirpath: .checkpoints
27
+ filename: '{step}'
28
+ enable_version_counter: false
29
+ every_n_train_steps: 2000
30
+ save_top_k: -1
31
+ save_last: link
32
+ verbose: true
33
+ save_initial_checkpoint: true
34
+ tok_path: /home/pl487/rds/hpc-work/rdd/outputs/tokenizers/bpe32000minipile
35
+ run_folder: .
36
+ out_parent_folder: model_train
37
+ tok_name: bpe32000minipile
38
+ dataset: minipile
39
+ pwd: /home/pl487/rds/hpc-work/rdd
40
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
41
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
42
+ model: smol_llama-1B
43
+ resume_from_checkpoint: .checkpoints/last.ckpt
44
+ save_initial_checkpoint: true
45
+ seed: 42
46
+ torch_compile: true
47
+ data:
48
+ batch_size: 16
49
+ eval_batch_size: 64
50
+ shuffle: true
51
+ drop_last: false
52
+ num_workers: 12
53
+ pin_memory: true
54
+ persistent_workers: false
55
+ prefetch_factor: 2
56
+ multiprocessing_context: null
57
+ optim:
58
+ optim_name: adamw
59
+ lr: 0.0006
60
+ weight_decay: 0.1
61
+ optim_kwargs:
62
+ fused: true
63
+ eps: 1.0e-08
64
+ betas:
65
+ - 0.9
66
+ - 0.95
67
+ scheduler_name: warmup_stable_decay
68
+ num_warmup_steps: 2000
69
+ scheduler_kwargs:
70
+ num_stable_steps: 46000
71
+ num_decay_steps: 2000
72
+ min_lr_ratio: 0.01
73
+ trainer:
74
+ accelerator: gpu
75
+ devices: 4
76
+ precision: bf16-true
77
+ deterministic: false
78
+ log_every_n_steps: 1
79
+ enable_progress_bar: true
80
+ fast_dev_run: false
81
+ gradient_clip_val: 1.0
82
+ gradient_clip_algorithm: norm
83
+ val_check_interval: 2000
84
+ max_steps: 50000
85
+ limit_val_batches: 500
version_0/events.out.tfevents.1739558750.gpu-q-76.214300.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59889d0ed2234e5a4e75cec9cae2b2e120151381bd796786bfeb213ef18a8f19
3
+ size 4855202
version_1/events.out.tfevents.1740418999.gpu-q-27.926781.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9fbbd1b6dfd0972d21cf05ce2672eefbcc4db22648415713de0949c79f9f8a3
3
+ size 3622
version_1/hparams.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataloader_config: !!python/object:src.datamodule.DataloaderConfig
2
+ batch_size: 16
3
+ drop_last: false
4
+ eval_batch_size: 128
5
+ multiprocessing_context: null
6
+ num_workers: 12
7
+ persistent_workers: false
8
+ pin_memory: true
9
+ prefetch_factor: 2
10
+ shuffle: true
11
+ eod_token_id: 0
12
+ max_position_embeddings: 2048
13
+ optim_config: !!python/object:src.module.OptimCofig
14
+ keller_kwargs: {}
15
+ lr: 0.0006
16
+ num_warmup_steps: 2000
17
+ optim_kwargs:
18
+ betas:
19
+ - 0.9
20
+ - 0.95
21
+ eps: 1.0e-08
22
+ fused: true
23
+ optim_name: adamw
24
+ scheduler_kwargs:
25
+ min_lr_ratio: 0.01
26
+ num_decay_steps: 2000
27
+ num_stable_steps: 46000
28
+ scheduler_name: warmup_stable_decay
29
+ weight_decay: 0.1
30
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
31
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
version_2/events.out.tfevents.1740422550.gpu-q-2.1283101.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e56ea76f0bad9a23a3f685c5c0eb06886c131de47ce0071eddb5fa6dcf549523
3
+ size 2127609
version_2/hparams.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataloader_config: !!python/object:src.datamodule.DataloaderConfig
2
+ batch_size: 16
3
+ drop_last: false
4
+ eval_batch_size: 128
5
+ multiprocessing_context: null
6
+ num_workers: 12
7
+ persistent_workers: false
8
+ pin_memory: true
9
+ prefetch_factor: 2
10
+ shuffle: true
11
+ eod_token_id: 0
12
+ max_position_embeddings: 2048
13
+ optim_config: !!python/object:src.module.OptimCofig
14
+ keller_kwargs: {}
15
+ lr: 0.0006
16
+ num_warmup_steps: 2000
17
+ optim_kwargs:
18
+ betas:
19
+ - 0.9
20
+ - 0.95
21
+ eps: 1.0e-08
22
+ fused: true
23
+ optim_name: adamw
24
+ scheduler_kwargs:
25
+ min_lr_ratio: 0.01
26
+ num_decay_steps: 2000
27
+ num_stable_steps: 46000
28
+ scheduler_name: warmup_stable_decay
29
+ weight_decay: 0.1
30
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
31
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
version_3/events.out.tfevents.1740440925.gpu-q-55.48783.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be3647eadd08b99c69cd19f1bc56490804d59fb53193add1b1738f46a03099c
3
+ size 4818038
version_3/hparams.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataloader_config: !!python/object:src.datamodule.DataloaderConfig
2
+ batch_size: 16
3
+ drop_last: false
4
+ eval_batch_size: 64
5
+ multiprocessing_context: null
6
+ num_workers: 12
7
+ persistent_workers: false
8
+ pin_memory: true
9
+ prefetch_factor: 2
10
+ shuffle: true
11
+ eod_token_id: 0
12
+ max_position_embeddings: 2048
13
+ optim_config: !!python/object:src.module.OptimCofig
14
+ keller_kwargs: {}
15
+ lr: 0.0006
16
+ num_warmup_steps: 2000
17
+ optim_kwargs:
18
+ betas:
19
+ - 0.9
20
+ - 0.95
21
+ eps: 1.0e-08
22
+ fused: true
23
+ optim_name: adamw
24
+ scheduler_kwargs:
25
+ min_lr_ratio: 0.01
26
+ num_decay_steps: 2000
27
+ num_stable_steps: 46000
28
+ scheduler_name: warmup_stable_decay
29
+ weight_decay: 0.1
30
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
31
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
version_4/events.out.tfevents.1740519086.gpu-q-73.978088.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:173f66866c09fca0a645b8a5e2b21daf379a4c670dd23dcb22a0a1903b7f149a
3
+ size 599200
version_4/hparams.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataloader_config: !!python/object:src.datamodule.DataloaderConfig
2
+ batch_size: 16
3
+ drop_last: false
4
+ eval_batch_size: 64
5
+ multiprocessing_context: null
6
+ num_workers: 12
7
+ persistent_workers: false
8
+ pin_memory: true
9
+ prefetch_factor: 2
10
+ shuffle: true
11
+ eod_token_id: 0
12
+ max_position_embeddings: 2048
13
+ optim_config: !!python/object:src.module.OptimCofig
14
+ keller_kwargs: {}
15
+ lr: 0.0006
16
+ num_warmup_steps: 2000
17
+ optim_kwargs:
18
+ betas:
19
+ - 0.9
20
+ - 0.95
21
+ eps: 1.0e-08
22
+ fused: true
23
+ optim_name: adamw
24
+ scheduler_kwargs:
25
+ min_lr_ratio: 0.01
26
+ num_decay_steps: 2000
27
+ num_stable_steps: 46000
28
+ scheduler_name: warmup_stable_decay
29
+ weight_decay: 0.1
30
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
31
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation