Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- config.json +65 -0
- global_step213995/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
- global_step213995/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
trainer_state.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/fs/archive/share/yulan/data/aa_mini/output/miniyulan-2B-final-stage22/checkpoint-208000",
|
3 |
+
"architectures": [
|
4 |
+
"MiniYuLanModelForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": true,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"dim_model_base": 1920,
|
10 |
+
"dim_model_base_attn": 64,
|
11 |
+
"dim_model_base_init": null,
|
12 |
+
"dim_model_base_lmh": 1,
|
13 |
+
"dim_model_base_logits": 1920.0,
|
14 |
+
"dim_model_base_lr": 256.0,
|
15 |
+
"down_proj_alpha": 0.03450327796711771,
|
16 |
+
"embed_tokens_alpha": 1,
|
17 |
+
"embedding_ln": false,
|
18 |
+
"embedding_rmsln": false,
|
19 |
+
"eos_token_id": 2,
|
20 |
+
"gate_up_proj_alpha": 0.3651483716701107,
|
21 |
+
"gradient_checkpointing_step": 11,
|
22 |
+
"hidden_act": "silu",
|
23 |
+
"hidden_size": 1920,
|
24 |
+
"hidden_states_shrink": 0.18708286933869706,
|
25 |
+
"init_scale_o": 1,
|
26 |
+
"initializer_range": 5e-05,
|
27 |
+
"input_layernorm_alpha": 1.0,
|
28 |
+
"intermediate_size": 4800,
|
29 |
+
"k_proj_alpha": 0.3651483716701107,
|
30 |
+
"layer_norm_eps": 1e-06,
|
31 |
+
"lm_head_alpha": 1.0,
|
32 |
+
"ln_scale": 1,
|
33 |
+
"max_position_embeddings": 4096,
|
34 |
+
"model_reproduce": "transformer",
|
35 |
+
"model_type": "miniyulan",
|
36 |
+
"norm_alpha": 1.0,
|
37 |
+
"num_attention_heads": 30,
|
38 |
+
"num_epochs_trained_before_this_epoch": 21,
|
39 |
+
"num_hidden_layers": 56,
|
40 |
+
"num_key_value_heads": 6,
|
41 |
+
"num_steps_trained_before_this_epoch": 204262,
|
42 |
+
"o_proj_alpha": 0.03450327796711771,
|
43 |
+
"post_attention_layernorm_alpha": 1.0,
|
44 |
+
"q_proj_alpha": 0.3651483716701107,
|
45 |
+
"qk_layernorm": false,
|
46 |
+
"rms_norm_eps": 1e-06,
|
47 |
+
"rms_type": "llama",
|
48 |
+
"rope_scaling": null,
|
49 |
+
"rope_theta": 10000.0,
|
50 |
+
"scale_emb": 10.0,
|
51 |
+
"shrink_alpha": 1,
|
52 |
+
"sliding_window": null,
|
53 |
+
"tie_word_embeddings": true,
|
54 |
+
"torch_dtype": "bfloat16",
|
55 |
+
"transformers_version": "4.44.0",
|
56 |
+
"use_cache": false,
|
57 |
+
"use_emb_alpha": true,
|
58 |
+
"use_liger": true,
|
59 |
+
"use_norm_alpha": true,
|
60 |
+
"use_sliding_window": false,
|
61 |
+
"v_proj_alpha": 0.3651483716701107,
|
62 |
+
"vocab_size": 99000,
|
63 |
+
"wesar_weights": true,
|
64 |
+
"z_loss": 0.0001
|
65 |
+
}
|
global_step213995/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74b94e83365cd52e98f8231435369606b04c99a3c894ffcdd9a553824465edf1
|
3 |
+
size 558554482
|
global_step213995/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ab4739412441d04ee9bd54f6e85dbad55ba865d23dcedd796375faaee26315d
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07ae42eba7bd7a93135524c68d85bd3bdb648dfd0be56f0804c9d8d91a7d9653
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cceb27144dfaa0ab003e60aebef0f18a492b7a90a6d6f42f0369011ad54ffb4e
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb62262bed06308ee8f3428a74273b482b9eb959042f11495c2741c6b273a5a9
|
3 |
+
size 558554434
|
global_step213995/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c24d9ecf7ddff5fc3ed5b7ac7a15c82de871eff2ce2b14c48791140722565b86
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54021369728e334ed588f373a493ea601c31245560f92b7f0968fa082a803670
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:671020930570f0ab5caec5915200b84a5b01e4ee5218a52b15d83d371f345667
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1939d58ce858e986c7d13bf5d881f43c1a076f5af538ab79012ec9c114f15d07
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c88e4114b45606422618162eccc4523a321430d98136342043573acf1a24119f
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f5d1ee79673a738df1c2e60b4b133013f82ef5713d753f73ad7b7dc97daf72d
|
3 |
+
size 558554434
|
global_step213995/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9cd96a7b7fd6be7841215c313e5db990e6e9207b6311d672775e2d7c49f1bf7
|
3 |
+
size 558554418
|
global_step213995/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f9ea307e6406108a1fb1dbe3aa6114d481716889a2b3fe932ce80b7dba81698
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6228afeb4e8a150564c2f75c856c9f38772880608b76d0b2da0710c07c3cbb16
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59a8277348fce552e4baee72b00eb72763cb2337b111b4afca7815b6edf9bed9
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfd3d201d01ce8f3668c21dfd44bb721cc37d601c7a77c1c07830b07d8a1d715
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05ab2a0362e0eab025599516a92e699bded4568787e83a5ff173fcaf93dfaff7
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e4d20006b2f0be7d8bff38668b63414674d0c0538633ab9037ebfc8e128f069
|
3 |
+
size 558554434
|
global_step213995/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12d1343b3c02ca97c1b638c7ac92e007ad935845853aa768a983b2b092169df4
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c05dc437ed63f9415244f877da4a3b1b378f4b09715b090f63c657bf1b7cf950
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c60bfcafbf9989a63c6ba77e937802c7fc85016d2431fc4a6af474a856c6b186
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21ad9d57622914e0ed062ca54787d74f98a50da348db7785614306f52354b5b2
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2c4dee81e101999e365693379f0478d319f738b333e2f9a3506df94588978d6
|
3 |
+
size 558554290
|
global_step213995/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c9d30cb6f53561f46013ebfcbf3db9fb63adece413e290ccaea804b82825198
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3176d662f9be9f0d026a81129a92d42908643f2783aadc053b6b4f47cbad5d2
|
3 |
+
size 558554434
|
global_step213995/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e25a5c2ecf46e765225aa29aa62b560da0971ca05d51a119d738ad3ff744779f
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e0dd243ece298d89ce7c08e3adb921c0fcd27cdf2f98dbf95295440d663e69b
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:438774c5cc329400d922451069fa73f9b6fd569f79790cb913a2e6d564e94e30
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ea4eda41941731914385e17318ddd22d0e1454491f2b6807712ae4262130a9d
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da8e44f267d3e9af9b214560907799f5c4b334a44819779aefc8aecf83a54ae9
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:136519f6403cfacaee470f321c4f420e74a5c02b9ddd3b539853535ae5c116af
|
3 |
+
size 558554434
|
global_step213995/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ce79e9b71edf12a0193c6296cb44825cf6722b17b35fe8dc2d7b85ba0636405
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cff4e01a7b84582c8c27c1977d2d79dde71f2d268ecfeab2561d6e994b062a7
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af2fe4ce24fe90521de683491cb1c0dcf5ad5397b918bc1d4dada50374c4e9e2
|
3 |
+
size 558554354
|
global_step213995/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dfcacb9505b719309e3c27f10e2a694388faaf902026f774bb35c6f4628e740
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84b0f1de8fad0ef4e89af8546269354c463908469cc677cedab2f66c44c811e3
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db883b91ca3e9bb6cb7c969dbcc81e848f13d4680a3b7bf611bcde55e44c6f95
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfcd756df2e1bffc89229603f889fad0a924e11d2b5fa73cae9e7915dce18771
|
3 |
+
size 558554434
|
global_step213995/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7532fb2eca9b1e2a85329395c4738bf5edf70939926618c94c2276e3cd77430
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16440ac742e755be27bcfc5adfec6383729c65ea766e81a523f0c396837b589e
|
3 |
+
size 558554370
|
global_step213995/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f54b8824108ccd99a6ed0b1a3a9f31f14b0d5c76230a57cf02f9bcb82d035fb8
|
3 |
+
size 558554306
|
global_step213995/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16460d18be6149b3c84b30368e0c412fd9e2cc8de37dcbc8511d074abf53ba66
|
3 |
+
size 558610626
|
global_step213995/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5cc5d009442aaac8c9e5cad9441a7a49e81d79f9f3dce5e31dc6f44c099e767
|
3 |
+
size 558554290
|
global_step213995/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40fd4b4a1771e1836e26ed0f1efce6a34168cef518fed108af77dfc55430707a
|
3 |
+
size 558554290
|
global_step213995/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d63934f9358ddd272dbd1ba0fafed925a39a744d4da269087d959e8a22d7b52
|
3 |
+
size 558554354
|
global_step213995/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ff077e49daf89de4ca718be18894d8d53101ec08fcb008340a12527e832033e
|
3 |
+
size 558554418
|
global_step213995/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d379af4b67216c28f57c0bd81ade28c687638c423647a73972c197bdd807995d
|
3 |
+
size 558554290
|
global_step213995/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e851b6214f90e00c98271b0d63b0386370fa6a1b1b723e3dd30d51b09525b08b
|
3 |
+
size 558554354
|