{'dim': 8192, 'n_layers': 80, 'n_heads': 64, 'n_kv_heads': 8, 'vocab_size': 128256, 'ffn_dim_multiplier': 1.3, 'multiple_of': 4096, 'norm_eps': 1e-05, 'rope_theta': 500000.0, 'use_scaled_rope': True}
000: original/consolidated.00.pth
001: original/consolidated.01.pth
002: original/consolidated.02.pth
003: original/consolidated.03.pth
004: original/consolidated.04.pth
005: original/consolidated.05.pth
006: original/consolidated.06.pth
007: original/consolidated.07.pth

-----------------------------------------------------------------------------
0 params in total.
0 bytes in total.
-----------------------------------------------------------------------------

[1/8]: Loading original/consolidated.00.pth
   0 :  131334144 : tok_embeddings.weight               : [16032, 8192]   : torch.bfloat16
   1 :    8388608 : layers.0.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
   2 :    1048576 : layers.0.attention.wk.weight        : [128, 8192]     : torch.bfloat16
   3 :    1048576 : layers.0.attention.wv.weight        : [128, 8192]     : torch.bfloat16
   4 :    8388608 : layers.0.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
   5 :   29360128 : layers.0.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
   6 :   29360128 : layers.0.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
   7 :   29360128 : layers.0.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
   8 :       8192 : layers.0.attention_norm.weight      : [8192]          : torch.bfloat16
   9 :       8192 : layers.0.ffn_norm.weight            : [8192]          : torch.bfloat16
  10 :    8388608 : layers.1.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  11 :    1048576 : layers.1.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  12 :    1048576 : layers.1.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  13 :    8388608 : layers.1.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  14 :   29360128 : layers.1.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  15 :   29360128 : layers.1.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  16 :   29360128 : layers.1.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  17 :       8192 : layers.1.attention_norm.weight      : [8192]          : torch.bfloat16
  18 :       8192 : layers.1.ffn_norm.weight            : [8192]          : torch.bfloat16
  19 :    8388608 : layers.2.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  20 :    1048576 : layers.2.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  21 :    1048576 : layers.2.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  22 :    8388608 : layers.2.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  23 :   29360128 : layers.2.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  24 :   29360128 : layers.2.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  25 :   29360128 : layers.2.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  26 :       8192 : layers.2.attention_norm.weight      : [8192]          : torch.bfloat16
  27 :       8192 : layers.2.ffn_norm.weight            : [8192]          : torch.bfloat16
  28 :    8388608 : layers.3.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  29 :    1048576 : layers.3.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  30 :    1048576 : layers.3.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  31 :    8388608 : layers.3.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  32 :   29360128 : layers.3.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  33 :   29360128 : layers.3.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  34 :   29360128 : layers.3.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  35 :       8192 : layers.3.attention_norm.weight      : [8192]          : torch.bfloat16
  36 :       8192 : layers.3.ffn_norm.weight            : [8192]          : torch.bfloat16
  37 :    8388608 : layers.4.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  38 :    1048576 : layers.4.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  39 :    1048576 : layers.4.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  40 :    8388608 : layers.4.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  41 :   29360128 : layers.4.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  42 :   29360128 : layers.4.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  43 :   29360128 : layers.4.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  44 :       8192 : layers.4.attention_norm.weight      : [8192]          : torch.bfloat16
  45 :       8192 : layers.4.ffn_norm.weight            : [8192]          : torch.bfloat16
  46 :    8388608 : layers.5.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  47 :    1048576 : layers.5.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  48 :    1048576 : layers.5.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  49 :    8388608 : layers.5.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  50 :   29360128 : layers.5.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  51 :   29360128 : layers.5.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  52 :   29360128 : layers.5.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  53 :       8192 : layers.5.attention_norm.weight      : [8192]          : torch.bfloat16
  54 :       8192 : layers.5.ffn_norm.weight            : [8192]          : torch.bfloat16
  55 :    8388608 : layers.6.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  56 :    1048576 : layers.6.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  57 :    1048576 : layers.6.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  58 :    8388608 : layers.6.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  59 :   29360128 : layers.6.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  60 :   29360128 : layers.6.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  61 :   29360128 : layers.6.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  62 :       8192 : layers.6.attention_norm.weight      : [8192]          : torch.bfloat16
  63 :       8192 : layers.6.ffn_norm.weight            : [8192]          : torch.bfloat16
  64 :    8388608 : layers.7.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  65 :    1048576 : layers.7.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  66 :    1048576 : layers.7.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  67 :    8388608 : layers.7.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  68 :   29360128 : layers.7.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  69 :   29360128 : layers.7.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  70 :   29360128 : layers.7.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  71 :       8192 : layers.7.attention_norm.weight      : [8192]          : torch.bfloat16
  72 :       8192 : layers.7.ffn_norm.weight            : [8192]          : torch.bfloat16
  73 :    8388608 : layers.8.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  74 :    1048576 : layers.8.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  75 :    1048576 : layers.8.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  76 :    8388608 : layers.8.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  77 :   29360128 : layers.8.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  78 :   29360128 : layers.8.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  79 :   29360128 : layers.8.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  80 :       8192 : layers.8.attention_norm.weight      : [8192]          : torch.bfloat16
  81 :       8192 : layers.8.ffn_norm.weight            : [8192]          : torch.bfloat16
  82 :    8388608 : layers.9.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  83 :    1048576 : layers.9.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  84 :    1048576 : layers.9.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  85 :    8388608 : layers.9.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  86 :   29360128 : layers.9.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  87 :   29360128 : layers.9.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  88 :   29360128 : layers.9.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  89 :       8192 : layers.9.attention_norm.weight      : [8192]          : torch.bfloat16
  90 :       8192 : layers.9.ffn_norm.weight            : [8192]          : torch.bfloat16
  91 :    8388608 : layers.10.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
  92 :    1048576 : layers.10.attention.wk.weight       : [128, 8192]     : torch.bfloat16
  93 :    1048576 : layers.10.attention.wv.weight       : [128, 8192]     : torch.bfloat16
  94 :    8388608 : layers.10.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
  95 :   29360128 : layers.10.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
  96 :   29360128 : layers.10.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
  97 :   29360128 : layers.10.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
  98 :       8192 : layers.10.attention_norm.weight     : [8192]          : torch.bfloat16
  99 :       8192 : layers.10.ffn_norm.weight           : [8192]          : torch.bfloat16
 100 :    8388608 : layers.11.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 101 :    1048576 : layers.11.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 102 :    1048576 : layers.11.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 103 :    8388608 : layers.11.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 104 :   29360128 : layers.11.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 105 :   29360128 : layers.11.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 106 :   29360128 : layers.11.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 107 :       8192 : layers.11.attention_norm.weight     : [8192]          : torch.bfloat16
 108 :       8192 : layers.11.ffn_norm.weight           : [8192]          : torch.bfloat16
 109 :    8388608 : layers.12.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 110 :    1048576 : layers.12.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 111 :    1048576 : layers.12.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 112 :    8388608 : layers.12.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 113 :   29360128 : layers.12.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 114 :   29360128 : layers.12.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 115 :   29360128 : layers.12.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 116 :       8192 : layers.12.attention_norm.weight     : [8192]          : torch.bfloat16
 117 :       8192 : layers.12.ffn_norm.weight           : [8192]          : torch.bfloat16
 118 :    8388608 : layers.13.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 119 :    1048576 : layers.13.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 120 :    1048576 : layers.13.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 121 :    8388608 : layers.13.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 122 :   29360128 : layers.13.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 123 :   29360128 : layers.13.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 124 :   29360128 : layers.13.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 125 :       8192 : layers.13.attention_norm.weight     : [8192]          : torch.bfloat16
 126 :       8192 : layers.13.ffn_norm.weight           : [8192]          : torch.bfloat16
 127 :    8388608 : layers.14.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 128 :    1048576 : layers.14.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 129 :    1048576 : layers.14.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 130 :    8388608 : layers.14.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 131 :   29360128 : layers.14.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 132 :   29360128 : layers.14.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 133 :   29360128 : layers.14.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 134 :       8192 : layers.14.attention_norm.weight     : [8192]          : torch.bfloat16
 135 :       8192 : layers.14.ffn_norm.weight           : [8192]          : torch.bfloat16
 136 :    8388608 : layers.15.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 137 :    1048576 : layers.15.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 138 :    1048576 : layers.15.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 139 :    8388608 : layers.15.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 140 :   29360128 : layers.15.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 141 :   29360128 : layers.15.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 142 :   29360128 : layers.15.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 143 :       8192 : layers.15.attention_norm.weight     : [8192]          : torch.bfloat16
 144 :       8192 : layers.15.ffn_norm.weight           : [8192]          : torch.bfloat16
 145 :    8388608 : layers.16.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 146 :    1048576 : layers.16.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 147 :    1048576 : layers.16.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 148 :    8388608 : layers.16.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 149 :   29360128 : layers.16.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 150 :   29360128 : layers.16.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 151 :   29360128 : layers.16.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 152 :       8192 : layers.16.attention_norm.weight     : [8192]          : torch.bfloat16
 153 :       8192 : layers.16.ffn_norm.weight           : [8192]          : torch.bfloat16
 154 :    8388608 : layers.17.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 155 :    1048576 : layers.17.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 156 :    1048576 : layers.17.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 157 :    8388608 : layers.17.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 158 :   29360128 : layers.17.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 159 :   29360128 : layers.17.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 160 :   29360128 : layers.17.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 161 :       8192 : layers.17.attention_norm.weight     : [8192]          : torch.bfloat16
 162 :       8192 : layers.17.ffn_norm.weight           : [8192]          : torch.bfloat16
 163 :    8388608 : layers.18.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 164 :    1048576 : layers.18.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 165 :    1048576 : layers.18.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 166 :    8388608 : layers.18.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 167 :   29360128 : layers.18.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 168 :   29360128 : layers.18.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 169 :   29360128 : layers.18.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 170 :       8192 : layers.18.attention_norm.weight     : [8192]          : torch.bfloat16
 171 :       8192 : layers.18.ffn_norm.weight           : [8192]          : torch.bfloat16
 172 :    8388608 : layers.19.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 173 :    1048576 : layers.19.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 174 :    1048576 : layers.19.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 175 :    8388608 : layers.19.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 176 :   29360128 : layers.19.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 177 :   29360128 : layers.19.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 178 :   29360128 : layers.19.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 179 :       8192 : layers.19.attention_norm.weight     : [8192]          : torch.bfloat16
 180 :       8192 : layers.19.ffn_norm.weight           : [8192]          : torch.bfloat16
 181 :    8388608 : layers.20.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 182 :    1048576 : layers.20.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 183 :    1048576 : layers.20.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 184 :    8388608 : layers.20.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 185 :   29360128 : layers.20.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 186 :   29360128 : layers.20.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 187 :   29360128 : layers.20.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 188 :       8192 : layers.20.attention_norm.weight     : [8192]          : torch.bfloat16
 189 :       8192 : layers.20.ffn_norm.weight           : [8192]          : torch.bfloat16
 190 :    8388608 : layers.21.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 191 :    1048576 : layers.21.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 192 :    1048576 : layers.21.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 193 :    8388608 : layers.21.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 194 :   29360128 : layers.21.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 195 :   29360128 : layers.21.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 196 :   29360128 : layers.21.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 197 :       8192 : layers.21.attention_norm.weight     : [8192]          : torch.bfloat16
 198 :       8192 : layers.21.ffn_norm.weight           : [8192]          : torch.bfloat16
 199 :    8388608 : layers.22.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 200 :    1048576 : layers.22.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 201 :    1048576 : layers.22.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 202 :    8388608 : layers.22.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 203 :   29360128 : layers.22.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 204 :   29360128 : layers.22.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 205 :   29360128 : layers.22.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 206 :       8192 : layers.22.attention_norm.weight     : [8192]          : torch.bfloat16
 207 :       8192 : layers.22.ffn_norm.weight           : [8192]          : torch.bfloat16
 208 :    8388608 : layers.23.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 209 :    1048576 : layers.23.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 210 :    1048576 : layers.23.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 211 :    8388608 : layers.23.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 212 :   29360128 : layers.23.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 213 :   29360128 : layers.23.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 214 :   29360128 : layers.23.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 215 :       8192 : layers.23.attention_norm.weight     : [8192]          : torch.bfloat16
 216 :       8192 : layers.23.ffn_norm.weight           : [8192]          : torch.bfloat16
 217 :    8388608 : layers.24.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 218 :    1048576 : layers.24.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 219 :    1048576 : layers.24.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 220 :    8388608 : layers.24.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 221 :   29360128 : layers.24.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 222 :   29360128 : layers.24.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 223 :   29360128 : layers.24.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 224 :       8192 : layers.24.attention_norm.weight     : [8192]          : torch.bfloat16
 225 :       8192 : layers.24.ffn_norm.weight           : [8192]          : torch.bfloat16
 226 :    8388608 : layers.25.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 227 :    1048576 : layers.25.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 228 :    1048576 : layers.25.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 229 :    8388608 : layers.25.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 230 :   29360128 : layers.25.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 231 :   29360128 : layers.25.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 232 :   29360128 : layers.25.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 233 :       8192 : layers.25.attention_norm.weight     : [8192]          : torch.bfloat16
 234 :       8192 : layers.25.ffn_norm.weight           : [8192]          : torch.bfloat16
 235 :    8388608 : layers.26.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 236 :    1048576 : layers.26.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 237 :    1048576 : layers.26.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 238 :    8388608 : layers.26.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 239 :   29360128 : layers.26.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 240 :   29360128 : layers.26.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 241 :   29360128 : layers.26.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 242 :       8192 : layers.26.attention_norm.weight     : [8192]          : torch.bfloat16
 243 :       8192 : layers.26.ffn_norm.weight           : [8192]          : torch.bfloat16
 244 :    8388608 : layers.27.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 245 :    1048576 : layers.27.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 246 :    1048576 : layers.27.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 247 :    8388608 : layers.27.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 248 :   29360128 : layers.27.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 249 :   29360128 : layers.27.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 250 :   29360128 : layers.27.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 251 :       8192 : layers.27.attention_norm.weight     : [8192]          : torch.bfloat16
 252 :       8192 : layers.27.ffn_norm.weight           : [8192]          : torch.bfloat16
 253 :    8388608 : layers.28.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 254 :    1048576 : layers.28.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 255 :    1048576 : layers.28.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 256 :    8388608 : layers.28.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 257 :   29360128 : layers.28.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 258 :   29360128 : layers.28.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 259 :   29360128 : layers.28.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 260 :       8192 : layers.28.attention_norm.weight     : [8192]          : torch.bfloat16
 261 :       8192 : layers.28.ffn_norm.weight           : [8192]          : torch.bfloat16
 262 :    8388608 : layers.29.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 263 :    1048576 : layers.29.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 264 :    1048576 : layers.29.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 265 :    8388608 : layers.29.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 266 :   29360128 : layers.29.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 267 :   29360128 : layers.29.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 268 :   29360128 : layers.29.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 269 :       8192 : layers.29.attention_norm.weight     : [8192]          : torch.bfloat16
 270 :       8192 : layers.29.ffn_norm.weight           : [8192]          : torch.bfloat16
 271 :    8388608 : layers.30.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 272 :    1048576 : layers.30.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 273 :    1048576 : layers.30.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 274 :    8388608 : layers.30.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 275 :   29360128 : layers.30.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 276 :   29360128 : layers.30.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 277 :   29360128 : layers.30.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 278 :       8192 : layers.30.attention_norm.weight     : [8192]          : torch.bfloat16
 279 :       8192 : layers.30.ffn_norm.weight           : [8192]          : torch.bfloat16
 280 :    8388608 : layers.31.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 281 :    1048576 : layers.31.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 282 :    1048576 : layers.31.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 283 :    8388608 : layers.31.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 284 :   29360128 : layers.31.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 285 :   29360128 : layers.31.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 286 :   29360128 : layers.31.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 287 :       8192 : layers.31.attention_norm.weight     : [8192]          : torch.bfloat16
 288 :       8192 : layers.31.ffn_norm.weight           : [8192]          : torch.bfloat16
 289 :    8388608 : layers.32.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 290 :    1048576 : layers.32.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 291 :    1048576 : layers.32.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 292 :    8388608 : layers.32.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 293 :   29360128 : layers.32.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 294 :   29360128 : layers.32.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 295 :   29360128 : layers.32.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 296 :       8192 : layers.32.attention_norm.weight     : [8192]          : torch.bfloat16
 297 :       8192 : layers.32.ffn_norm.weight           : [8192]          : torch.bfloat16
 298 :    8388608 : layers.33.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 299 :    1048576 : layers.33.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 300 :    1048576 : layers.33.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 301 :    8388608 : layers.33.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 302 :   29360128 : layers.33.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 303 :   29360128 : layers.33.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 304 :   29360128 : layers.33.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 305 :       8192 : layers.33.attention_norm.weight     : [8192]          : torch.bfloat16
 306 :       8192 : layers.33.ffn_norm.weight           : [8192]          : torch.bfloat16
 307 :    8388608 : layers.34.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 308 :    1048576 : layers.34.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 309 :    1048576 : layers.34.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 310 :    8388608 : layers.34.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 311 :   29360128 : layers.34.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 312 :   29360128 : layers.34.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 313 :   29360128 : layers.34.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 314 :       8192 : layers.34.attention_norm.weight     : [8192]          : torch.bfloat16
 315 :       8192 : layers.34.ffn_norm.weight           : [8192]          : torch.bfloat16
 316 :    8388608 : layers.35.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 317 :    1048576 : layers.35.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 318 :    1048576 : layers.35.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 319 :    8388608 : layers.35.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 320 :   29360128 : layers.35.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 321 :   29360128 : layers.35.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 322 :   29360128 : layers.35.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 323 :       8192 : layers.35.attention_norm.weight     : [8192]          : torch.bfloat16
 324 :       8192 : layers.35.ffn_norm.weight           : [8192]          : torch.bfloat16
 325 :    8388608 : layers.36.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 326 :    1048576 : layers.36.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 327 :    1048576 : layers.36.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 328 :    8388608 : layers.36.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 329 :   29360128 : layers.36.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 330 :   29360128 : layers.36.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 331 :   29360128 : layers.36.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 332 :       8192 : layers.36.attention_norm.weight     : [8192]          : torch.bfloat16
 333 :       8192 : layers.36.ffn_norm.weight           : [8192]          : torch.bfloat16
 334 :    8388608 : layers.37.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 335 :    1048576 : layers.37.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 336 :    1048576 : layers.37.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 337 :    8388608 : layers.37.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 338 :   29360128 : layers.37.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 339 :   29360128 : layers.37.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 340 :   29360128 : layers.37.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 341 :       8192 : layers.37.attention_norm.weight     : [8192]          : torch.bfloat16
 342 :       8192 : layers.37.ffn_norm.weight           : [8192]          : torch.bfloat16
 343 :    8388608 : layers.38.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 344 :    1048576 : layers.38.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 345 :    1048576 : layers.38.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 346 :    8388608 : layers.38.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 347 :   29360128 : layers.38.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 348 :   29360128 : layers.38.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 349 :   29360128 : layers.38.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 350 :       8192 : layers.38.attention_norm.weight     : [8192]          : torch.bfloat16
 351 :       8192 : layers.38.ffn_norm.weight           : [8192]          : torch.bfloat16
 352 :    8388608 : layers.39.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 353 :    1048576 : layers.39.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 354 :    1048576 : layers.39.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 355 :    8388608 : layers.39.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 356 :   29360128 : layers.39.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 357 :   29360128 : layers.39.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 358 :   29360128 : layers.39.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 359 :       8192 : layers.39.attention_norm.weight     : [8192]          : torch.bfloat16
 360 :       8192 : layers.39.ffn_norm.weight           : [8192]          : torch.bfloat16
 361 :    8388608 : layers.40.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 362 :    1048576 : layers.40.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 363 :    1048576 : layers.40.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 364 :    8388608 : layers.40.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 365 :   29360128 : layers.40.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 366 :   29360128 : layers.40.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 367 :   29360128 : layers.40.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 368 :       8192 : layers.40.attention_norm.weight     : [8192]          : torch.bfloat16
 369 :       8192 : layers.40.ffn_norm.weight           : [8192]          : torch.bfloat16
 370 :    8388608 : layers.41.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 371 :    1048576 : layers.41.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 372 :    1048576 : layers.41.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 373 :    8388608 : layers.41.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 374 :   29360128 : layers.41.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 375 :   29360128 : layers.41.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 376 :   29360128 : layers.41.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 377 :       8192 : layers.41.attention_norm.weight     : [8192]          : torch.bfloat16
 378 :       8192 : layers.41.ffn_norm.weight           : [8192]          : torch.bfloat16
 379 :    8388608 : layers.42.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 380 :    1048576 : layers.42.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 381 :    1048576 : layers.42.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 382 :    8388608 : layers.42.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 383 :   29360128 : layers.42.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 384 :   29360128 : layers.42.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 385 :   29360128 : layers.42.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 386 :       8192 : layers.42.attention_norm.weight     : [8192]          : torch.bfloat16
 387 :       8192 : layers.42.ffn_norm.weight           : [8192]          : torch.bfloat16
 388 :    8388608 : layers.43.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 389 :    1048576 : layers.43.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 390 :    1048576 : layers.43.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 391 :    8388608 : layers.43.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 392 :   29360128 : layers.43.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 393 :   29360128 : layers.43.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 394 :   29360128 : layers.43.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 395 :       8192 : layers.43.attention_norm.weight     : [8192]          : torch.bfloat16
 396 :       8192 : layers.43.ffn_norm.weight           : [8192]          : torch.bfloat16
 397 :    8388608 : layers.44.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 398 :    1048576 : layers.44.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 399 :    1048576 : layers.44.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 400 :    8388608 : layers.44.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 401 :   29360128 : layers.44.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 402 :   29360128 : layers.44.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 403 :   29360128 : layers.44.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 404 :       8192 : layers.44.attention_norm.weight     : [8192]          : torch.bfloat16
 405 :       8192 : layers.44.ffn_norm.weight           : [8192]          : torch.bfloat16
 406 :    8388608 : layers.45.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 407 :    1048576 : layers.45.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 408 :    1048576 : layers.45.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 409 :    8388608 : layers.45.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 410 :   29360128 : layers.45.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 411 :   29360128 : layers.45.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 412 :   29360128 : layers.45.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 413 :       8192 : layers.45.attention_norm.weight     : [8192]          : torch.bfloat16
 414 :       8192 : layers.45.ffn_norm.weight           : [8192]          : torch.bfloat16
 415 :    8388608 : layers.46.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 416 :    1048576 : layers.46.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 417 :    1048576 : layers.46.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 418 :    8388608 : layers.46.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 419 :   29360128 : layers.46.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 420 :   29360128 : layers.46.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 421 :   29360128 : layers.46.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 422 :       8192 : layers.46.attention_norm.weight     : [8192]          : torch.bfloat16
 423 :       8192 : layers.46.ffn_norm.weight           : [8192]          : torch.bfloat16
 424 :    8388608 : layers.47.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 425 :    1048576 : layers.47.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 426 :    1048576 : layers.47.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 427 :    8388608 : layers.47.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 428 :   29360128 : layers.47.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 429 :   29360128 : layers.47.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 430 :   29360128 : layers.47.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 431 :       8192 : layers.47.attention_norm.weight     : [8192]          : torch.bfloat16
 432 :       8192 : layers.47.ffn_norm.weight           : [8192]          : torch.bfloat16
 433 :    8388608 : layers.48.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 434 :    1048576 : layers.48.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 435 :    1048576 : layers.48.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 436 :    8388608 : layers.48.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 437 :   29360128 : layers.48.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 438 :   29360128 : layers.48.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 439 :   29360128 : layers.48.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 440 :       8192 : layers.48.attention_norm.weight     : [8192]          : torch.bfloat16
 441 :       8192 : layers.48.ffn_norm.weight           : [8192]          : torch.bfloat16
 442 :    8388608 : layers.49.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 443 :    1048576 : layers.49.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 444 :    1048576 : layers.49.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 445 :    8388608 : layers.49.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 446 :   29360128 : layers.49.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 447 :   29360128 : layers.49.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 448 :   29360128 : layers.49.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 449 :       8192 : layers.49.attention_norm.weight     : [8192]          : torch.bfloat16
 450 :       8192 : layers.49.ffn_norm.weight           : [8192]          : torch.bfloat16
 451 :    8388608 : layers.50.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 452 :    1048576 : layers.50.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 453 :    1048576 : layers.50.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 454 :    8388608 : layers.50.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 455 :   29360128 : layers.50.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 456 :   29360128 : layers.50.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 457 :   29360128 : layers.50.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 458 :       8192 : layers.50.attention_norm.weight     : [8192]          : torch.bfloat16
 459 :       8192 : layers.50.ffn_norm.weight           : [8192]          : torch.bfloat16
 460 :    8388608 : layers.51.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 461 :    1048576 : layers.51.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 462 :    1048576 : layers.51.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 463 :    8388608 : layers.51.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 464 :   29360128 : layers.51.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 465 :   29360128 : layers.51.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 466 :   29360128 : layers.51.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 467 :       8192 : layers.51.attention_norm.weight     : [8192]          : torch.bfloat16
 468 :       8192 : layers.51.ffn_norm.weight           : [8192]          : torch.bfloat16
 469 :    8388608 : layers.52.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 470 :    1048576 : layers.52.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 471 :    1048576 : layers.52.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 472 :    8388608 : layers.52.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 473 :   29360128 : layers.52.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 474 :   29360128 : layers.52.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 475 :   29360128 : layers.52.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 476 :       8192 : layers.52.attention_norm.weight     : [8192]          : torch.bfloat16
 477 :       8192 : layers.52.ffn_norm.weight           : [8192]          : torch.bfloat16
 478 :    8388608 : layers.53.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 479 :    1048576 : layers.53.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 480 :    1048576 : layers.53.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 481 :    8388608 : layers.53.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 482 :   29360128 : layers.53.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 483 :   29360128 : layers.53.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 484 :   29360128 : layers.53.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 485 :       8192 : layers.53.attention_norm.weight     : [8192]          : torch.bfloat16
 486 :       8192 : layers.53.ffn_norm.weight           : [8192]          : torch.bfloat16
 487 :    8388608 : layers.54.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 488 :    1048576 : layers.54.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 489 :    1048576 : layers.54.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 490 :    8388608 : layers.54.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 491 :   29360128 : layers.54.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 492 :   29360128 : layers.54.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 493 :   29360128 : layers.54.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 494 :       8192 : layers.54.attention_norm.weight     : [8192]          : torch.bfloat16
 495 :       8192 : layers.54.ffn_norm.weight           : [8192]          : torch.bfloat16
 496 :    8388608 : layers.55.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 497 :    1048576 : layers.55.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 498 :    1048576 : layers.55.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 499 :    8388608 : layers.55.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 500 :   29360128 : layers.55.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 501 :   29360128 : layers.55.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 502 :   29360128 : layers.55.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 503 :       8192 : layers.55.attention_norm.weight     : [8192]          : torch.bfloat16
 504 :       8192 : layers.55.ffn_norm.weight           : [8192]          : torch.bfloat16
 505 :    8388608 : layers.56.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 506 :    1048576 : layers.56.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 507 :    1048576 : layers.56.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 508 :    8388608 : layers.56.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 509 :   29360128 : layers.56.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 510 :   29360128 : layers.56.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 511 :   29360128 : layers.56.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 512 :       8192 : layers.56.attention_norm.weight     : [8192]          : torch.bfloat16
 513 :       8192 : layers.56.ffn_norm.weight           : [8192]          : torch.bfloat16
 514 :    8388608 : layers.57.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 515 :    1048576 : layers.57.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 516 :    1048576 : layers.57.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 517 :    8388608 : layers.57.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 518 :   29360128 : layers.57.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 519 :   29360128 : layers.57.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 520 :   29360128 : layers.57.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 521 :       8192 : layers.57.attention_norm.weight     : [8192]          : torch.bfloat16
 522 :       8192 : layers.57.ffn_norm.weight           : [8192]          : torch.bfloat16
 523 :    8388608 : layers.58.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 524 :    1048576 : layers.58.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 525 :    1048576 : layers.58.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 526 :    8388608 : layers.58.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 527 :   29360128 : layers.58.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 528 :   29360128 : layers.58.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 529 :   29360128 : layers.58.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 530 :       8192 : layers.58.attention_norm.weight     : [8192]          : torch.bfloat16
 531 :       8192 : layers.58.ffn_norm.weight           : [8192]          : torch.bfloat16
 532 :    8388608 : layers.59.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 533 :    1048576 : layers.59.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 534 :    1048576 : layers.59.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 535 :    8388608 : layers.59.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 536 :   29360128 : layers.59.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 537 :   29360128 : layers.59.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 538 :   29360128 : layers.59.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 539 :       8192 : layers.59.attention_norm.weight     : [8192]          : torch.bfloat16
 540 :       8192 : layers.59.ffn_norm.weight           : [8192]          : torch.bfloat16
 541 :    8388608 : layers.60.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 542 :    1048576 : layers.60.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 543 :    1048576 : layers.60.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 544 :    8388608 : layers.60.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 545 :   29360128 : layers.60.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 546 :   29360128 : layers.60.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 547 :   29360128 : layers.60.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 548 :       8192 : layers.60.attention_norm.weight     : [8192]          : torch.bfloat16
 549 :       8192 : layers.60.ffn_norm.weight           : [8192]          : torch.bfloat16
 550 :    8388608 : layers.61.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 551 :    1048576 : layers.61.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 552 :    1048576 : layers.61.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 553 :    8388608 : layers.61.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 554 :   29360128 : layers.61.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 555 :   29360128 : layers.61.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 556 :   29360128 : layers.61.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 557 :       8192 : layers.61.attention_norm.weight     : [8192]          : torch.bfloat16
 558 :       8192 : layers.61.ffn_norm.weight           : [8192]          : torch.bfloat16
 559 :    8388608 : layers.62.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 560 :    1048576 : layers.62.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 561 :    1048576 : layers.62.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 562 :    8388608 : layers.62.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 563 :   29360128 : layers.62.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 564 :   29360128 : layers.62.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 565 :   29360128 : layers.62.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 566 :       8192 : layers.62.attention_norm.weight     : [8192]          : torch.bfloat16
 567 :       8192 : layers.62.ffn_norm.weight           : [8192]          : torch.bfloat16
 568 :    8388608 : layers.63.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 569 :    1048576 : layers.63.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 570 :    1048576 : layers.63.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 571 :    8388608 : layers.63.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 572 :   29360128 : layers.63.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 573 :   29360128 : layers.63.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 574 :   29360128 : layers.63.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 575 :       8192 : layers.63.attention_norm.weight     : [8192]          : torch.bfloat16
 576 :       8192 : layers.63.ffn_norm.weight           : [8192]          : torch.bfloat16
 577 :    8388608 : layers.64.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 578 :    1048576 : layers.64.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 579 :    1048576 : layers.64.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 580 :    8388608 : layers.64.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 581 :   29360128 : layers.64.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 582 :   29360128 : layers.64.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 583 :   29360128 : layers.64.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 584 :       8192 : layers.64.attention_norm.weight     : [8192]          : torch.bfloat16
 585 :       8192 : layers.64.ffn_norm.weight           : [8192]          : torch.bfloat16
 586 :    8388608 : layers.65.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 587 :    1048576 : layers.65.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 588 :    1048576 : layers.65.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 589 :    8388608 : layers.65.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 590 :   29360128 : layers.65.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 591 :   29360128 : layers.65.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 592 :   29360128 : layers.65.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 593 :       8192 : layers.65.attention_norm.weight     : [8192]          : torch.bfloat16
 594 :       8192 : layers.65.ffn_norm.weight           : [8192]          : torch.bfloat16
 595 :    8388608 : layers.66.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 596 :    1048576 : layers.66.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 597 :    1048576 : layers.66.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 598 :    8388608 : layers.66.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 599 :   29360128 : layers.66.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 600 :   29360128 : layers.66.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 601 :   29360128 : layers.66.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 602 :       8192 : layers.66.attention_norm.weight     : [8192]          : torch.bfloat16
 603 :       8192 : layers.66.ffn_norm.weight           : [8192]          : torch.bfloat16
 604 :    8388608 : layers.67.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 605 :    1048576 : layers.67.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 606 :    1048576 : layers.67.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 607 :    8388608 : layers.67.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 608 :   29360128 : layers.67.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 609 :   29360128 : layers.67.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 610 :   29360128 : layers.67.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 611 :       8192 : layers.67.attention_norm.weight     : [8192]          : torch.bfloat16
 612 :       8192 : layers.67.ffn_norm.weight           : [8192]          : torch.bfloat16
 613 :    8388608 : layers.68.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 614 :    1048576 : layers.68.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 615 :    1048576 : layers.68.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 616 :    8388608 : layers.68.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 617 :   29360128 : layers.68.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 618 :   29360128 : layers.68.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 619 :   29360128 : layers.68.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 620 :       8192 : layers.68.attention_norm.weight     : [8192]          : torch.bfloat16
 621 :       8192 : layers.68.ffn_norm.weight           : [8192]          : torch.bfloat16
 622 :    8388608 : layers.69.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 623 :    1048576 : layers.69.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 624 :    1048576 : layers.69.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 625 :    8388608 : layers.69.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 626 :   29360128 : layers.69.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 627 :   29360128 : layers.69.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 628 :   29360128 : layers.69.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 629 :       8192 : layers.69.attention_norm.weight     : [8192]          : torch.bfloat16
 630 :       8192 : layers.69.ffn_norm.weight           : [8192]          : torch.bfloat16
 631 :    8388608 : layers.70.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 632 :    1048576 : layers.70.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 633 :    1048576 : layers.70.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 634 :    8388608 : layers.70.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 635 :   29360128 : layers.70.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 636 :   29360128 : layers.70.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 637 :   29360128 : layers.70.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 638 :       8192 : layers.70.attention_norm.weight     : [8192]          : torch.bfloat16
 639 :       8192 : layers.70.ffn_norm.weight           : [8192]          : torch.bfloat16
 640 :    8388608 : layers.71.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 641 :    1048576 : layers.71.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 642 :    1048576 : layers.71.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 643 :    8388608 : layers.71.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 644 :   29360128 : layers.71.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 645 :   29360128 : layers.71.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 646 :   29360128 : layers.71.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 647 :       8192 : layers.71.attention_norm.weight     : [8192]          : torch.bfloat16
 648 :       8192 : layers.71.ffn_norm.weight           : [8192]          : torch.bfloat16
 649 :    8388608 : layers.72.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 650 :    1048576 : layers.72.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 651 :    1048576 : layers.72.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 652 :    8388608 : layers.72.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 653 :   29360128 : layers.72.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 654 :   29360128 : layers.72.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 655 :   29360128 : layers.72.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 656 :       8192 : layers.72.attention_norm.weight     : [8192]          : torch.bfloat16
 657 :       8192 : layers.72.ffn_norm.weight           : [8192]          : torch.bfloat16
 658 :    8388608 : layers.73.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 659 :    1048576 : layers.73.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 660 :    1048576 : layers.73.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 661 :    8388608 : layers.73.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 662 :   29360128 : layers.73.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 663 :   29360128 : layers.73.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 664 :   29360128 : layers.73.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 665 :       8192 : layers.73.attention_norm.weight     : [8192]          : torch.bfloat16
 666 :       8192 : layers.73.ffn_norm.weight           : [8192]          : torch.bfloat16
 667 :    8388608 : layers.74.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 668 :    1048576 : layers.74.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 669 :    1048576 : layers.74.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 670 :    8388608 : layers.74.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 671 :   29360128 : layers.74.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 672 :   29360128 : layers.74.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 673 :   29360128 : layers.74.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 674 :       8192 : layers.74.attention_norm.weight     : [8192]          : torch.bfloat16
 675 :       8192 : layers.74.ffn_norm.weight           : [8192]          : torch.bfloat16
 676 :    8388608 : layers.75.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 677 :    1048576 : layers.75.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 678 :    1048576 : layers.75.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 679 :    8388608 : layers.75.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 680 :   29360128 : layers.75.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 681 :   29360128 : layers.75.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 682 :   29360128 : layers.75.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 683 :       8192 : layers.75.attention_norm.weight     : [8192]          : torch.bfloat16
 684 :       8192 : layers.75.ffn_norm.weight           : [8192]          : torch.bfloat16
 685 :    8388608 : layers.76.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 686 :    1048576 : layers.76.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 687 :    1048576 : layers.76.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 688 :    8388608 : layers.76.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 689 :   29360128 : layers.76.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 690 :   29360128 : layers.76.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 691 :   29360128 : layers.76.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 692 :       8192 : layers.76.attention_norm.weight     : [8192]          : torch.bfloat16
 693 :       8192 : layers.76.ffn_norm.weight           : [8192]          : torch.bfloat16
 694 :    8388608 : layers.77.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 695 :    1048576 : layers.77.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 696 :    1048576 : layers.77.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 697 :    8388608 : layers.77.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 698 :   29360128 : layers.77.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 699 :   29360128 : layers.77.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 700 :   29360128 : layers.77.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 701 :       8192 : layers.77.attention_norm.weight     : [8192]          : torch.bfloat16
 702 :       8192 : layers.77.ffn_norm.weight           : [8192]          : torch.bfloat16
 703 :    8388608 : layers.78.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 704 :    1048576 : layers.78.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 705 :    1048576 : layers.78.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 706 :    8388608 : layers.78.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 707 :   29360128 : layers.78.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 708 :   29360128 : layers.78.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 709 :   29360128 : layers.78.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 710 :       8192 : layers.78.attention_norm.weight     : [8192]          : torch.bfloat16
 711 :       8192 : layers.78.ffn_norm.weight           : [8192]          : torch.bfloat16
 712 :    8388608 : layers.79.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 713 :    1048576 : layers.79.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 714 :    1048576 : layers.79.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 715 :    8388608 : layers.79.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 716 :   29360128 : layers.79.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 717 :   29360128 : layers.79.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 718 :   29360128 : layers.79.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 719 :       8192 : layers.79.attention_norm.weight     : [8192]          : torch.bfloat16
 720 :       8192 : layers.79.ffn_norm.weight           : [8192]          : torch.bfloat16
 721 :       8192 : norm.weight                         : [8192]          : torch.bfloat16
 722 :  131334144 : output.weight                       : [16032, 8192]   : torch.bfloat16

-----------------------------------------------------------------------------
8820367360 params in total.
17640734720 bytes in total.
5.89 sec, 5.89 sec, 2858.18 MB/s
-----------------------------------------------------------------------------

[2/8]: Loading original/consolidated.01.pth
   0 :  131334144 : tok_embeddings.weight               : [16032, 8192]   : torch.bfloat16
   1 :    8388608 : layers.0.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
   2 :    1048576 : layers.0.attention.wk.weight        : [128, 8192]     : torch.bfloat16
   3 :    1048576 : layers.0.attention.wv.weight        : [128, 8192]     : torch.bfloat16
   4 :    8388608 : layers.0.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
   5 :   29360128 : layers.0.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
   6 :   29360128 : layers.0.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
   7 :   29360128 : layers.0.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
   8 :       8192 : layers.0.attention_norm.weight      : [8192]          : torch.bfloat16
   9 :       8192 : layers.0.ffn_norm.weight            : [8192]          : torch.bfloat16
  10 :    8388608 : layers.1.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  11 :    1048576 : layers.1.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  12 :    1048576 : layers.1.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  13 :    8388608 : layers.1.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  14 :   29360128 : layers.1.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  15 :   29360128 : layers.1.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  16 :   29360128 : layers.1.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  17 :       8192 : layers.1.attention_norm.weight      : [8192]          : torch.bfloat16
  18 :       8192 : layers.1.ffn_norm.weight            : [8192]          : torch.bfloat16
  19 :    8388608 : layers.2.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  20 :    1048576 : layers.2.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  21 :    1048576 : layers.2.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  22 :    8388608 : layers.2.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  23 :   29360128 : layers.2.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  24 :   29360128 : layers.2.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  25 :   29360128 : layers.2.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  26 :       8192 : layers.2.attention_norm.weight      : [8192]          : torch.bfloat16
  27 :       8192 : layers.2.ffn_norm.weight            : [8192]          : torch.bfloat16
  28 :    8388608 : layers.3.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  29 :    1048576 : layers.3.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  30 :    1048576 : layers.3.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  31 :    8388608 : layers.3.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  32 :   29360128 : layers.3.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  33 :   29360128 : layers.3.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  34 :   29360128 : layers.3.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  35 :       8192 : layers.3.attention_norm.weight      : [8192]          : torch.bfloat16
  36 :       8192 : layers.3.ffn_norm.weight            : [8192]          : torch.bfloat16
  37 :    8388608 : layers.4.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  38 :    1048576 : layers.4.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  39 :    1048576 : layers.4.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  40 :    8388608 : layers.4.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  41 :   29360128 : layers.4.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  42 :   29360128 : layers.4.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  43 :   29360128 : layers.4.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  44 :       8192 : layers.4.attention_norm.weight      : [8192]          : torch.bfloat16
  45 :       8192 : layers.4.ffn_norm.weight            : [8192]          : torch.bfloat16
  46 :    8388608 : layers.5.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  47 :    1048576 : layers.5.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  48 :    1048576 : layers.5.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  49 :    8388608 : layers.5.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  50 :   29360128 : layers.5.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  51 :   29360128 : layers.5.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  52 :   29360128 : layers.5.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  53 :       8192 : layers.5.attention_norm.weight      : [8192]          : torch.bfloat16
  54 :       8192 : layers.5.ffn_norm.weight            : [8192]          : torch.bfloat16
  55 :    8388608 : layers.6.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  56 :    1048576 : layers.6.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  57 :    1048576 : layers.6.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  58 :    8388608 : layers.6.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  59 :   29360128 : layers.6.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  60 :   29360128 : layers.6.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  61 :   29360128 : layers.6.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  62 :       8192 : layers.6.attention_norm.weight      : [8192]          : torch.bfloat16
  63 :       8192 : layers.6.ffn_norm.weight            : [8192]          : torch.bfloat16
  64 :    8388608 : layers.7.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  65 :    1048576 : layers.7.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  66 :    1048576 : layers.7.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  67 :    8388608 : layers.7.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  68 :   29360128 : layers.7.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  69 :   29360128 : layers.7.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  70 :   29360128 : layers.7.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  71 :       8192 : layers.7.attention_norm.weight      : [8192]          : torch.bfloat16
  72 :       8192 : layers.7.ffn_norm.weight            : [8192]          : torch.bfloat16
  73 :    8388608 : layers.8.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  74 :    1048576 : layers.8.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  75 :    1048576 : layers.8.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  76 :    8388608 : layers.8.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  77 :   29360128 : layers.8.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  78 :   29360128 : layers.8.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  79 :   29360128 : layers.8.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  80 :       8192 : layers.8.attention_norm.weight      : [8192]          : torch.bfloat16
  81 :       8192 : layers.8.ffn_norm.weight            : [8192]          : torch.bfloat16
  82 :    8388608 : layers.9.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  83 :    1048576 : layers.9.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  84 :    1048576 : layers.9.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  85 :    8388608 : layers.9.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  86 :   29360128 : layers.9.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  87 :   29360128 : layers.9.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  88 :   29360128 : layers.9.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  89 :       8192 : layers.9.attention_norm.weight      : [8192]          : torch.bfloat16
  90 :       8192 : layers.9.ffn_norm.weight            : [8192]          : torch.bfloat16
  91 :    8388608 : layers.10.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
  92 :    1048576 : layers.10.attention.wk.weight       : [128, 8192]     : torch.bfloat16
  93 :    1048576 : layers.10.attention.wv.weight       : [128, 8192]     : torch.bfloat16
  94 :    8388608 : layers.10.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
  95 :   29360128 : layers.10.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
  96 :   29360128 : layers.10.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
  97 :   29360128 : layers.10.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
  98 :       8192 : layers.10.attention_norm.weight     : [8192]          : torch.bfloat16
  99 :       8192 : layers.10.ffn_norm.weight           : [8192]          : torch.bfloat16
 100 :    8388608 : layers.11.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 101 :    1048576 : layers.11.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 102 :    1048576 : layers.11.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 103 :    8388608 : layers.11.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 104 :   29360128 : layers.11.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 105 :   29360128 : layers.11.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 106 :   29360128 : layers.11.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 107 :       8192 : layers.11.attention_norm.weight     : [8192]          : torch.bfloat16
 108 :       8192 : layers.11.ffn_norm.weight           : [8192]          : torch.bfloat16
 109 :    8388608 : layers.12.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 110 :    1048576 : layers.12.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 111 :    1048576 : layers.12.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 112 :    8388608 : layers.12.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 113 :   29360128 : layers.12.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 114 :   29360128 : layers.12.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 115 :   29360128 : layers.12.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 116 :       8192 : layers.12.attention_norm.weight     : [8192]          : torch.bfloat16
 117 :       8192 : layers.12.ffn_norm.weight           : [8192]          : torch.bfloat16
 118 :    8388608 : layers.13.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 119 :    1048576 : layers.13.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 120 :    1048576 : layers.13.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 121 :    8388608 : layers.13.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 122 :   29360128 : layers.13.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 123 :   29360128 : layers.13.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 124 :   29360128 : layers.13.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 125 :       8192 : layers.13.attention_norm.weight     : [8192]          : torch.bfloat16
 126 :       8192 : layers.13.ffn_norm.weight           : [8192]          : torch.bfloat16
 127 :    8388608 : layers.14.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 128 :    1048576 : layers.14.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 129 :    1048576 : layers.14.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 130 :    8388608 : layers.14.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 131 :   29360128 : layers.14.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 132 :   29360128 : layers.14.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 133 :   29360128 : layers.14.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 134 :       8192 : layers.14.attention_norm.weight     : [8192]          : torch.bfloat16
 135 :       8192 : layers.14.ffn_norm.weight           : [8192]          : torch.bfloat16
 136 :    8388608 : layers.15.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 137 :    1048576 : layers.15.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 138 :    1048576 : layers.15.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 139 :    8388608 : layers.15.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 140 :   29360128 : layers.15.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 141 :   29360128 : layers.15.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 142 :   29360128 : layers.15.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 143 :       8192 : layers.15.attention_norm.weight     : [8192]          : torch.bfloat16
 144 :       8192 : layers.15.ffn_norm.weight           : [8192]          : torch.bfloat16
 145 :    8388608 : layers.16.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 146 :    1048576 : layers.16.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 147 :    1048576 : layers.16.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 148 :    8388608 : layers.16.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 149 :   29360128 : layers.16.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 150 :   29360128 : layers.16.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 151 :   29360128 : layers.16.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 152 :       8192 : layers.16.attention_norm.weight     : [8192]          : torch.bfloat16
 153 :       8192 : layers.16.ffn_norm.weight           : [8192]          : torch.bfloat16
 154 :    8388608 : layers.17.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 155 :    1048576 : layers.17.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 156 :    1048576 : layers.17.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 157 :    8388608 : layers.17.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 158 :   29360128 : layers.17.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 159 :   29360128 : layers.17.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 160 :   29360128 : layers.17.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 161 :       8192 : layers.17.attention_norm.weight     : [8192]          : torch.bfloat16
 162 :       8192 : layers.17.ffn_norm.weight           : [8192]          : torch.bfloat16
 163 :    8388608 : layers.18.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 164 :    1048576 : layers.18.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 165 :    1048576 : layers.18.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 166 :    8388608 : layers.18.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 167 :   29360128 : layers.18.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 168 :   29360128 : layers.18.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 169 :   29360128 : layers.18.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 170 :       8192 : layers.18.attention_norm.weight     : [8192]          : torch.bfloat16
 171 :       8192 : layers.18.ffn_norm.weight           : [8192]          : torch.bfloat16
 172 :    8388608 : layers.19.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 173 :    1048576 : layers.19.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 174 :    1048576 : layers.19.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 175 :    8388608 : layers.19.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 176 :   29360128 : layers.19.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 177 :   29360128 : layers.19.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 178 :   29360128 : layers.19.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 179 :       8192 : layers.19.attention_norm.weight     : [8192]          : torch.bfloat16
 180 :       8192 : layers.19.ffn_norm.weight           : [8192]          : torch.bfloat16
 181 :    8388608 : layers.20.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 182 :    1048576 : layers.20.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 183 :    1048576 : layers.20.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 184 :    8388608 : layers.20.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 185 :   29360128 : layers.20.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 186 :   29360128 : layers.20.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 187 :   29360128 : layers.20.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 188 :       8192 : layers.20.attention_norm.weight     : [8192]          : torch.bfloat16
 189 :       8192 : layers.20.ffn_norm.weight           : [8192]          : torch.bfloat16
 190 :    8388608 : layers.21.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 191 :    1048576 : layers.21.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 192 :    1048576 : layers.21.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 193 :    8388608 : layers.21.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 194 :   29360128 : layers.21.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 195 :   29360128 : layers.21.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 196 :   29360128 : layers.21.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 197 :       8192 : layers.21.attention_norm.weight     : [8192]          : torch.bfloat16
 198 :       8192 : layers.21.ffn_norm.weight           : [8192]          : torch.bfloat16
 199 :    8388608 : layers.22.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 200 :    1048576 : layers.22.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 201 :    1048576 : layers.22.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 202 :    8388608 : layers.22.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 203 :   29360128 : layers.22.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 204 :   29360128 : layers.22.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 205 :   29360128 : layers.22.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 206 :       8192 : layers.22.attention_norm.weight     : [8192]          : torch.bfloat16
 207 :       8192 : layers.22.ffn_norm.weight           : [8192]          : torch.bfloat16
 208 :    8388608 : layers.23.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 209 :    1048576 : layers.23.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 210 :    1048576 : layers.23.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 211 :    8388608 : layers.23.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 212 :   29360128 : layers.23.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 213 :   29360128 : layers.23.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 214 :   29360128 : layers.23.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 215 :       8192 : layers.23.attention_norm.weight     : [8192]          : torch.bfloat16
 216 :       8192 : layers.23.ffn_norm.weight           : [8192]          : torch.bfloat16
 217 :    8388608 : layers.24.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 218 :    1048576 : layers.24.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 219 :    1048576 : layers.24.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 220 :    8388608 : layers.24.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 221 :   29360128 : layers.24.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 222 :   29360128 : layers.24.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 223 :   29360128 : layers.24.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 224 :       8192 : layers.24.attention_norm.weight     : [8192]          : torch.bfloat16
 225 :       8192 : layers.24.ffn_norm.weight           : [8192]          : torch.bfloat16
 226 :    8388608 : layers.25.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 227 :    1048576 : layers.25.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 228 :    1048576 : layers.25.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 229 :    8388608 : layers.25.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 230 :   29360128 : layers.25.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 231 :   29360128 : layers.25.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 232 :   29360128 : layers.25.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 233 :       8192 : layers.25.attention_norm.weight     : [8192]          : torch.bfloat16
 234 :       8192 : layers.25.ffn_norm.weight           : [8192]          : torch.bfloat16
 235 :    8388608 : layers.26.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 236 :    1048576 : layers.26.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 237 :    1048576 : layers.26.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 238 :    8388608 : layers.26.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 239 :   29360128 : layers.26.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 240 :   29360128 : layers.26.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 241 :   29360128 : layers.26.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 242 :       8192 : layers.26.attention_norm.weight     : [8192]          : torch.bfloat16
 243 :       8192 : layers.26.ffn_norm.weight           : [8192]          : torch.bfloat16
 244 :    8388608 : layers.27.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 245 :    1048576 : layers.27.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 246 :    1048576 : layers.27.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 247 :    8388608 : layers.27.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 248 :   29360128 : layers.27.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 249 :   29360128 : layers.27.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 250 :   29360128 : layers.27.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 251 :       8192 : layers.27.attention_norm.weight     : [8192]          : torch.bfloat16
 252 :       8192 : layers.27.ffn_norm.weight           : [8192]          : torch.bfloat16
 253 :    8388608 : layers.28.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 254 :    1048576 : layers.28.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 255 :    1048576 : layers.28.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 256 :    8388608 : layers.28.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 257 :   29360128 : layers.28.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 258 :   29360128 : layers.28.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 259 :   29360128 : layers.28.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 260 :       8192 : layers.28.attention_norm.weight     : [8192]          : torch.bfloat16
 261 :       8192 : layers.28.ffn_norm.weight           : [8192]          : torch.bfloat16
 262 :    8388608 : layers.29.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 263 :    1048576 : layers.29.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 264 :    1048576 : layers.29.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 265 :    8388608 : layers.29.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 266 :   29360128 : layers.29.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 267 :   29360128 : layers.29.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 268 :   29360128 : layers.29.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 269 :       8192 : layers.29.attention_norm.weight     : [8192]          : torch.bfloat16
 270 :       8192 : layers.29.ffn_norm.weight           : [8192]          : torch.bfloat16
 271 :    8388608 : layers.30.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 272 :    1048576 : layers.30.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 273 :    1048576 : layers.30.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 274 :    8388608 : layers.30.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 275 :   29360128 : layers.30.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 276 :   29360128 : layers.30.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 277 :   29360128 : layers.30.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 278 :       8192 : layers.30.attention_norm.weight     : [8192]          : torch.bfloat16
 279 :       8192 : layers.30.ffn_norm.weight           : [8192]          : torch.bfloat16
 280 :    8388608 : layers.31.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 281 :    1048576 : layers.31.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 282 :    1048576 : layers.31.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 283 :    8388608 : layers.31.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 284 :   29360128 : layers.31.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 285 :   29360128 : layers.31.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 286 :   29360128 : layers.31.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 287 :       8192 : layers.31.attention_norm.weight     : [8192]          : torch.bfloat16
 288 :       8192 : layers.31.ffn_norm.weight           : [8192]          : torch.bfloat16
 289 :    8388608 : layers.32.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 290 :    1048576 : layers.32.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 291 :    1048576 : layers.32.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 292 :    8388608 : layers.32.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 293 :   29360128 : layers.32.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 294 :   29360128 : layers.32.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 295 :   29360128 : layers.32.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 296 :       8192 : layers.32.attention_norm.weight     : [8192]          : torch.bfloat16
 297 :       8192 : layers.32.ffn_norm.weight           : [8192]          : torch.bfloat16
 298 :    8388608 : layers.33.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 299 :    1048576 : layers.33.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 300 :    1048576 : layers.33.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 301 :    8388608 : layers.33.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 302 :   29360128 : layers.33.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 303 :   29360128 : layers.33.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 304 :   29360128 : layers.33.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 305 :       8192 : layers.33.attention_norm.weight     : [8192]          : torch.bfloat16
 306 :       8192 : layers.33.ffn_norm.weight           : [8192]          : torch.bfloat16
 307 :    8388608 : layers.34.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 308 :    1048576 : layers.34.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 309 :    1048576 : layers.34.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 310 :    8388608 : layers.34.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 311 :   29360128 : layers.34.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 312 :   29360128 : layers.34.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 313 :   29360128 : layers.34.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 314 :       8192 : layers.34.attention_norm.weight     : [8192]          : torch.bfloat16
 315 :       8192 : layers.34.ffn_norm.weight           : [8192]          : torch.bfloat16
 316 :    8388608 : layers.35.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 317 :    1048576 : layers.35.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 318 :    1048576 : layers.35.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 319 :    8388608 : layers.35.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 320 :   29360128 : layers.35.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 321 :   29360128 : layers.35.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 322 :   29360128 : layers.35.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 323 :       8192 : layers.35.attention_norm.weight     : [8192]          : torch.bfloat16
 324 :       8192 : layers.35.ffn_norm.weight           : [8192]          : torch.bfloat16
 325 :    8388608 : layers.36.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 326 :    1048576 : layers.36.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 327 :    1048576 : layers.36.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 328 :    8388608 : layers.36.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 329 :   29360128 : layers.36.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 330 :   29360128 : layers.36.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 331 :   29360128 : layers.36.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 332 :       8192 : layers.36.attention_norm.weight     : [8192]          : torch.bfloat16
 333 :       8192 : layers.36.ffn_norm.weight           : [8192]          : torch.bfloat16
 334 :    8388608 : layers.37.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 335 :    1048576 : layers.37.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 336 :    1048576 : layers.37.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 337 :    8388608 : layers.37.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 338 :   29360128 : layers.37.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 339 :   29360128 : layers.37.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 340 :   29360128 : layers.37.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 341 :       8192 : layers.37.attention_norm.weight     : [8192]          : torch.bfloat16
 342 :       8192 : layers.37.ffn_norm.weight           : [8192]          : torch.bfloat16
 343 :    8388608 : layers.38.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 344 :    1048576 : layers.38.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 345 :    1048576 : layers.38.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 346 :    8388608 : layers.38.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 347 :   29360128 : layers.38.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 348 :   29360128 : layers.38.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 349 :   29360128 : layers.38.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 350 :       8192 : layers.38.attention_norm.weight     : [8192]          : torch.bfloat16
 351 :       8192 : layers.38.ffn_norm.weight           : [8192]          : torch.bfloat16
 352 :    8388608 : layers.39.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 353 :    1048576 : layers.39.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 354 :    1048576 : layers.39.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 355 :    8388608 : layers.39.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 356 :   29360128 : layers.39.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 357 :   29360128 : layers.39.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 358 :   29360128 : layers.39.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 359 :       8192 : layers.39.attention_norm.weight     : [8192]          : torch.bfloat16
 360 :       8192 : layers.39.ffn_norm.weight           : [8192]          : torch.bfloat16
 361 :    8388608 : layers.40.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 362 :    1048576 : layers.40.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 363 :    1048576 : layers.40.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 364 :    8388608 : layers.40.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 365 :   29360128 : layers.40.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 366 :   29360128 : layers.40.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 367 :   29360128 : layers.40.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 368 :       8192 : layers.40.attention_norm.weight     : [8192]          : torch.bfloat16
 369 :       8192 : layers.40.ffn_norm.weight           : [8192]          : torch.bfloat16
 370 :    8388608 : layers.41.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 371 :    1048576 : layers.41.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 372 :    1048576 : layers.41.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 373 :    8388608 : layers.41.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 374 :   29360128 : layers.41.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 375 :   29360128 : layers.41.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 376 :   29360128 : layers.41.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 377 :       8192 : layers.41.attention_norm.weight     : [8192]          : torch.bfloat16
 378 :       8192 : layers.41.ffn_norm.weight           : [8192]          : torch.bfloat16
 379 :    8388608 : layers.42.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 380 :    1048576 : layers.42.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 381 :    1048576 : layers.42.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 382 :    8388608 : layers.42.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 383 :   29360128 : layers.42.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 384 :   29360128 : layers.42.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 385 :   29360128 : layers.42.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 386 :       8192 : layers.42.attention_norm.weight     : [8192]          : torch.bfloat16
 387 :       8192 : layers.42.ffn_norm.weight           : [8192]          : torch.bfloat16
 388 :    8388608 : layers.43.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 389 :    1048576 : layers.43.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 390 :    1048576 : layers.43.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 391 :    8388608 : layers.43.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 392 :   29360128 : layers.43.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 393 :   29360128 : layers.43.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 394 :   29360128 : layers.43.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 395 :       8192 : layers.43.attention_norm.weight     : [8192]          : torch.bfloat16
 396 :       8192 : layers.43.ffn_norm.weight           : [8192]          : torch.bfloat16
 397 :    8388608 : layers.44.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 398 :    1048576 : layers.44.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 399 :    1048576 : layers.44.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 400 :    8388608 : layers.44.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 401 :   29360128 : layers.44.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 402 :   29360128 : layers.44.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 403 :   29360128 : layers.44.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 404 :       8192 : layers.44.attention_norm.weight     : [8192]          : torch.bfloat16
 405 :       8192 : layers.44.ffn_norm.weight           : [8192]          : torch.bfloat16
 406 :    8388608 : layers.45.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 407 :    1048576 : layers.45.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 408 :    1048576 : layers.45.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 409 :    8388608 : layers.45.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 410 :   29360128 : layers.45.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 411 :   29360128 : layers.45.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 412 :   29360128 : layers.45.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 413 :       8192 : layers.45.attention_norm.weight     : [8192]          : torch.bfloat16
 414 :       8192 : layers.45.ffn_norm.weight           : [8192]          : torch.bfloat16
 415 :    8388608 : layers.46.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 416 :    1048576 : layers.46.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 417 :    1048576 : layers.46.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 418 :    8388608 : layers.46.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 419 :   29360128 : layers.46.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 420 :   29360128 : layers.46.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 421 :   29360128 : layers.46.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 422 :       8192 : layers.46.attention_norm.weight     : [8192]          : torch.bfloat16
 423 :       8192 : layers.46.ffn_norm.weight           : [8192]          : torch.bfloat16
 424 :    8388608 : layers.47.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 425 :    1048576 : layers.47.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 426 :    1048576 : layers.47.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 427 :    8388608 : layers.47.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 428 :   29360128 : layers.47.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 429 :   29360128 : layers.47.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 430 :   29360128 : layers.47.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 431 :       8192 : layers.47.attention_norm.weight     : [8192]          : torch.bfloat16
 432 :       8192 : layers.47.ffn_norm.weight           : [8192]          : torch.bfloat16
 433 :    8388608 : layers.48.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 434 :    1048576 : layers.48.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 435 :    1048576 : layers.48.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 436 :    8388608 : layers.48.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 437 :   29360128 : layers.48.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 438 :   29360128 : layers.48.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 439 :   29360128 : layers.48.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 440 :       8192 : layers.48.attention_norm.weight     : [8192]          : torch.bfloat16
 441 :       8192 : layers.48.ffn_norm.weight           : [8192]          : torch.bfloat16
 442 :    8388608 : layers.49.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 443 :    1048576 : layers.49.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 444 :    1048576 : layers.49.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 445 :    8388608 : layers.49.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 446 :   29360128 : layers.49.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 447 :   29360128 : layers.49.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 448 :   29360128 : layers.49.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 449 :       8192 : layers.49.attention_norm.weight     : [8192]          : torch.bfloat16
 450 :       8192 : layers.49.ffn_norm.weight           : [8192]          : torch.bfloat16
 451 :    8388608 : layers.50.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 452 :    1048576 : layers.50.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 453 :    1048576 : layers.50.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 454 :    8388608 : layers.50.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 455 :   29360128 : layers.50.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 456 :   29360128 : layers.50.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 457 :   29360128 : layers.50.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 458 :       8192 : layers.50.attention_norm.weight     : [8192]          : torch.bfloat16
 459 :       8192 : layers.50.ffn_norm.weight           : [8192]          : torch.bfloat16
 460 :    8388608 : layers.51.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 461 :    1048576 : layers.51.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 462 :    1048576 : layers.51.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 463 :    8388608 : layers.51.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 464 :   29360128 : layers.51.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 465 :   29360128 : layers.51.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 466 :   29360128 : layers.51.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 467 :       8192 : layers.51.attention_norm.weight     : [8192]          : torch.bfloat16
 468 :       8192 : layers.51.ffn_norm.weight           : [8192]          : torch.bfloat16
 469 :    8388608 : layers.52.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 470 :    1048576 : layers.52.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 471 :    1048576 : layers.52.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 472 :    8388608 : layers.52.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 473 :   29360128 : layers.52.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 474 :   29360128 : layers.52.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 475 :   29360128 : layers.52.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 476 :       8192 : layers.52.attention_norm.weight     : [8192]          : torch.bfloat16
 477 :       8192 : layers.52.ffn_norm.weight           : [8192]          : torch.bfloat16
 478 :    8388608 : layers.53.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 479 :    1048576 : layers.53.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 480 :    1048576 : layers.53.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 481 :    8388608 : layers.53.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 482 :   29360128 : layers.53.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 483 :   29360128 : layers.53.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 484 :   29360128 : layers.53.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 485 :       8192 : layers.53.attention_norm.weight     : [8192]          : torch.bfloat16
 486 :       8192 : layers.53.ffn_norm.weight           : [8192]          : torch.bfloat16
 487 :    8388608 : layers.54.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 488 :    1048576 : layers.54.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 489 :    1048576 : layers.54.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 490 :    8388608 : layers.54.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 491 :   29360128 : layers.54.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 492 :   29360128 : layers.54.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 493 :   29360128 : layers.54.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 494 :       8192 : layers.54.attention_norm.weight     : [8192]          : torch.bfloat16
 495 :       8192 : layers.54.ffn_norm.weight           : [8192]          : torch.bfloat16
 496 :    8388608 : layers.55.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 497 :    1048576 : layers.55.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 498 :    1048576 : layers.55.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 499 :    8388608 : layers.55.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 500 :   29360128 : layers.55.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 501 :   29360128 : layers.55.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 502 :   29360128 : layers.55.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 503 :       8192 : layers.55.attention_norm.weight     : [8192]          : torch.bfloat16
 504 :       8192 : layers.55.ffn_norm.weight           : [8192]          : torch.bfloat16
 505 :    8388608 : layers.56.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 506 :    1048576 : layers.56.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 507 :    1048576 : layers.56.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 508 :    8388608 : layers.56.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 509 :   29360128 : layers.56.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 510 :   29360128 : layers.56.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 511 :   29360128 : layers.56.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 512 :       8192 : layers.56.attention_norm.weight     : [8192]          : torch.bfloat16
 513 :       8192 : layers.56.ffn_norm.weight           : [8192]          : torch.bfloat16
 514 :    8388608 : layers.57.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 515 :    1048576 : layers.57.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 516 :    1048576 : layers.57.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 517 :    8388608 : layers.57.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 518 :   29360128 : layers.57.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 519 :   29360128 : layers.57.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 520 :   29360128 : layers.57.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 521 :       8192 : layers.57.attention_norm.weight     : [8192]          : torch.bfloat16
 522 :       8192 : layers.57.ffn_norm.weight           : [8192]          : torch.bfloat16
 523 :    8388608 : layers.58.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 524 :    1048576 : layers.58.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 525 :    1048576 : layers.58.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 526 :    8388608 : layers.58.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 527 :   29360128 : layers.58.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 528 :   29360128 : layers.58.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 529 :   29360128 : layers.58.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 530 :       8192 : layers.58.attention_norm.weight     : [8192]          : torch.bfloat16
 531 :       8192 : layers.58.ffn_norm.weight           : [8192]          : torch.bfloat16
 532 :    8388608 : layers.59.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 533 :    1048576 : layers.59.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 534 :    1048576 : layers.59.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 535 :    8388608 : layers.59.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 536 :   29360128 : layers.59.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 537 :   29360128 : layers.59.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 538 :   29360128 : layers.59.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 539 :       8192 : layers.59.attention_norm.weight     : [8192]          : torch.bfloat16
 540 :       8192 : layers.59.ffn_norm.weight           : [8192]          : torch.bfloat16
 541 :    8388608 : layers.60.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 542 :    1048576 : layers.60.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 543 :    1048576 : layers.60.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 544 :    8388608 : layers.60.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 545 :   29360128 : layers.60.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 546 :   29360128 : layers.60.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 547 :   29360128 : layers.60.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 548 :       8192 : layers.60.attention_norm.weight     : [8192]          : torch.bfloat16
 549 :       8192 : layers.60.ffn_norm.weight           : [8192]          : torch.bfloat16
 550 :    8388608 : layers.61.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 551 :    1048576 : layers.61.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 552 :    1048576 : layers.61.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 553 :    8388608 : layers.61.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 554 :   29360128 : layers.61.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 555 :   29360128 : layers.61.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 556 :   29360128 : layers.61.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 557 :       8192 : layers.61.attention_norm.weight     : [8192]          : torch.bfloat16
 558 :       8192 : layers.61.ffn_norm.weight           : [8192]          : torch.bfloat16
 559 :    8388608 : layers.62.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 560 :    1048576 : layers.62.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 561 :    1048576 : layers.62.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 562 :    8388608 : layers.62.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 563 :   29360128 : layers.62.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 564 :   29360128 : layers.62.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 565 :   29360128 : layers.62.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 566 :       8192 : layers.62.attention_norm.weight     : [8192]          : torch.bfloat16
 567 :       8192 : layers.62.ffn_norm.weight           : [8192]          : torch.bfloat16
 568 :    8388608 : layers.63.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 569 :    1048576 : layers.63.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 570 :    1048576 : layers.63.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 571 :    8388608 : layers.63.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 572 :   29360128 : layers.63.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 573 :   29360128 : layers.63.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 574 :   29360128 : layers.63.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 575 :       8192 : layers.63.attention_norm.weight     : [8192]          : torch.bfloat16
 576 :       8192 : layers.63.ffn_norm.weight           : [8192]          : torch.bfloat16
 577 :    8388608 : layers.64.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 578 :    1048576 : layers.64.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 579 :    1048576 : layers.64.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 580 :    8388608 : layers.64.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 581 :   29360128 : layers.64.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 582 :   29360128 : layers.64.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 583 :   29360128 : layers.64.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 584 :       8192 : layers.64.attention_norm.weight     : [8192]          : torch.bfloat16
 585 :       8192 : layers.64.ffn_norm.weight           : [8192]          : torch.bfloat16
 586 :    8388608 : layers.65.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 587 :    1048576 : layers.65.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 588 :    1048576 : layers.65.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 589 :    8388608 : layers.65.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 590 :   29360128 : layers.65.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 591 :   29360128 : layers.65.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 592 :   29360128 : layers.65.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 593 :       8192 : layers.65.attention_norm.weight     : [8192]          : torch.bfloat16
 594 :       8192 : layers.65.ffn_norm.weight           : [8192]          : torch.bfloat16
 595 :    8388608 : layers.66.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 596 :    1048576 : layers.66.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 597 :    1048576 : layers.66.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 598 :    8388608 : layers.66.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 599 :   29360128 : layers.66.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 600 :   29360128 : layers.66.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 601 :   29360128 : layers.66.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 602 :       8192 : layers.66.attention_norm.weight     : [8192]          : torch.bfloat16
 603 :       8192 : layers.66.ffn_norm.weight           : [8192]          : torch.bfloat16
 604 :    8388608 : layers.67.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 605 :    1048576 : layers.67.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 606 :    1048576 : layers.67.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 607 :    8388608 : layers.67.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 608 :   29360128 : layers.67.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 609 :   29360128 : layers.67.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 610 :   29360128 : layers.67.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 611 :       8192 : layers.67.attention_norm.weight     : [8192]          : torch.bfloat16
 612 :       8192 : layers.67.ffn_norm.weight           : [8192]          : torch.bfloat16
 613 :    8388608 : layers.68.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 614 :    1048576 : layers.68.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 615 :    1048576 : layers.68.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 616 :    8388608 : layers.68.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 617 :   29360128 : layers.68.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 618 :   29360128 : layers.68.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 619 :   29360128 : layers.68.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 620 :       8192 : layers.68.attention_norm.weight     : [8192]          : torch.bfloat16
 621 :       8192 : layers.68.ffn_norm.weight           : [8192]          : torch.bfloat16
 622 :    8388608 : layers.69.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 623 :    1048576 : layers.69.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 624 :    1048576 : layers.69.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 625 :    8388608 : layers.69.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 626 :   29360128 : layers.69.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 627 :   29360128 : layers.69.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 628 :   29360128 : layers.69.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 629 :       8192 : layers.69.attention_norm.weight     : [8192]          : torch.bfloat16
 630 :       8192 : layers.69.ffn_norm.weight           : [8192]          : torch.bfloat16
 631 :    8388608 : layers.70.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 632 :    1048576 : layers.70.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 633 :    1048576 : layers.70.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 634 :    8388608 : layers.70.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 635 :   29360128 : layers.70.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 636 :   29360128 : layers.70.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 637 :   29360128 : layers.70.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 638 :       8192 : layers.70.attention_norm.weight     : [8192]          : torch.bfloat16
 639 :       8192 : layers.70.ffn_norm.weight           : [8192]          : torch.bfloat16
 640 :    8388608 : layers.71.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 641 :    1048576 : layers.71.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 642 :    1048576 : layers.71.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 643 :    8388608 : layers.71.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 644 :   29360128 : layers.71.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 645 :   29360128 : layers.71.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 646 :   29360128 : layers.71.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 647 :       8192 : layers.71.attention_norm.weight     : [8192]          : torch.bfloat16
 648 :       8192 : layers.71.ffn_norm.weight           : [8192]          : torch.bfloat16
 649 :    8388608 : layers.72.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 650 :    1048576 : layers.72.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 651 :    1048576 : layers.72.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 652 :    8388608 : layers.72.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 653 :   29360128 : layers.72.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 654 :   29360128 : layers.72.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 655 :   29360128 : layers.72.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 656 :       8192 : layers.72.attention_norm.weight     : [8192]          : torch.bfloat16
 657 :       8192 : layers.72.ffn_norm.weight           : [8192]          : torch.bfloat16
 658 :    8388608 : layers.73.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 659 :    1048576 : layers.73.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 660 :    1048576 : layers.73.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 661 :    8388608 : layers.73.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 662 :   29360128 : layers.73.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 663 :   29360128 : layers.73.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 664 :   29360128 : layers.73.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 665 :       8192 : layers.73.attention_norm.weight     : [8192]          : torch.bfloat16
 666 :       8192 : layers.73.ffn_norm.weight           : [8192]          : torch.bfloat16
 667 :    8388608 : layers.74.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 668 :    1048576 : layers.74.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 669 :    1048576 : layers.74.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 670 :    8388608 : layers.74.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 671 :   29360128 : layers.74.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 672 :   29360128 : layers.74.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 673 :   29360128 : layers.74.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 674 :       8192 : layers.74.attention_norm.weight     : [8192]          : torch.bfloat16
 675 :       8192 : layers.74.ffn_norm.weight           : [8192]          : torch.bfloat16
 676 :    8388608 : layers.75.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 677 :    1048576 : layers.75.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 678 :    1048576 : layers.75.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 679 :    8388608 : layers.75.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 680 :   29360128 : layers.75.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 681 :   29360128 : layers.75.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 682 :   29360128 : layers.75.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 683 :       8192 : layers.75.attention_norm.weight     : [8192]          : torch.bfloat16
 684 :       8192 : layers.75.ffn_norm.weight           : [8192]          : torch.bfloat16
 685 :    8388608 : layers.76.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 686 :    1048576 : layers.76.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 687 :    1048576 : layers.76.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 688 :    8388608 : layers.76.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 689 :   29360128 : layers.76.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 690 :   29360128 : layers.76.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 691 :   29360128 : layers.76.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 692 :       8192 : layers.76.attention_norm.weight     : [8192]          : torch.bfloat16
 693 :       8192 : layers.76.ffn_norm.weight           : [8192]          : torch.bfloat16
 694 :    8388608 : layers.77.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 695 :    1048576 : layers.77.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 696 :    1048576 : layers.77.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 697 :    8388608 : layers.77.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 698 :   29360128 : layers.77.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 699 :   29360128 : layers.77.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 700 :   29360128 : layers.77.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 701 :       8192 : layers.77.attention_norm.weight     : [8192]          : torch.bfloat16
 702 :       8192 : layers.77.ffn_norm.weight           : [8192]          : torch.bfloat16
 703 :    8388608 : layers.78.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 704 :    1048576 : layers.78.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 705 :    1048576 : layers.78.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 706 :    8388608 : layers.78.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 707 :   29360128 : layers.78.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 708 :   29360128 : layers.78.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 709 :   29360128 : layers.78.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 710 :       8192 : layers.78.attention_norm.weight     : [8192]          : torch.bfloat16
 711 :       8192 : layers.78.ffn_norm.weight           : [8192]          : torch.bfloat16
 712 :    8388608 : layers.79.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 713 :    1048576 : layers.79.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 714 :    1048576 : layers.79.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 715 :    8388608 : layers.79.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 716 :   29360128 : layers.79.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 717 :   29360128 : layers.79.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 718 :   29360128 : layers.79.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 719 :       8192 : layers.79.attention_norm.weight     : [8192]          : torch.bfloat16
 720 :       8192 : layers.79.ffn_norm.weight           : [8192]          : torch.bfloat16
 721 :       8192 : norm.weight                         : [8192]          : torch.bfloat16
 722 :  131334144 : output.weight                       : [16032, 8192]   : torch.bfloat16

-----------------------------------------------------------------------------
17640734720 params in total.
35281469440 bytes in total.
12.15 sec, 6.27 sec, 2768.56 MB/s
-----------------------------------------------------------------------------

[3/8]: Loading original/consolidated.02.pth
   0 :  131334144 : tok_embeddings.weight               : [16032, 8192]   : torch.bfloat16
   1 :    8388608 : layers.0.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
   2 :    1048576 : layers.0.attention.wk.weight        : [128, 8192]     : torch.bfloat16
   3 :    1048576 : layers.0.attention.wv.weight        : [128, 8192]     : torch.bfloat16
   4 :    8388608 : layers.0.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
   5 :   29360128 : layers.0.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
   6 :   29360128 : layers.0.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
   7 :   29360128 : layers.0.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
   8 :       8192 : layers.0.attention_norm.weight      : [8192]          : torch.bfloat16
   9 :       8192 : layers.0.ffn_norm.weight            : [8192]          : torch.bfloat16
  10 :    8388608 : layers.1.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  11 :    1048576 : layers.1.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  12 :    1048576 : layers.1.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  13 :    8388608 : layers.1.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  14 :   29360128 : layers.1.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  15 :   29360128 : layers.1.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  16 :   29360128 : layers.1.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  17 :       8192 : layers.1.attention_norm.weight      : [8192]          : torch.bfloat16
  18 :       8192 : layers.1.ffn_norm.weight            : [8192]          : torch.bfloat16
  19 :    8388608 : layers.2.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  20 :    1048576 : layers.2.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  21 :    1048576 : layers.2.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  22 :    8388608 : layers.2.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  23 :   29360128 : layers.2.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  24 :   29360128 : layers.2.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  25 :   29360128 : layers.2.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  26 :       8192 : layers.2.attention_norm.weight      : [8192]          : torch.bfloat16
  27 :       8192 : layers.2.ffn_norm.weight            : [8192]          : torch.bfloat16
  28 :    8388608 : layers.3.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  29 :    1048576 : layers.3.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  30 :    1048576 : layers.3.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  31 :    8388608 : layers.3.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  32 :   29360128 : layers.3.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  33 :   29360128 : layers.3.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  34 :   29360128 : layers.3.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  35 :       8192 : layers.3.attention_norm.weight      : [8192]          : torch.bfloat16
  36 :       8192 : layers.3.ffn_norm.weight            : [8192]          : torch.bfloat16
  37 :    8388608 : layers.4.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  38 :    1048576 : layers.4.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  39 :    1048576 : layers.4.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  40 :    8388608 : layers.4.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  41 :   29360128 : layers.4.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  42 :   29360128 : layers.4.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  43 :   29360128 : layers.4.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  44 :       8192 : layers.4.attention_norm.weight      : [8192]          : torch.bfloat16
  45 :       8192 : layers.4.ffn_norm.weight            : [8192]          : torch.bfloat16
  46 :    8388608 : layers.5.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  47 :    1048576 : layers.5.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  48 :    1048576 : layers.5.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  49 :    8388608 : layers.5.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  50 :   29360128 : layers.5.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  51 :   29360128 : layers.5.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  52 :   29360128 : layers.5.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  53 :       8192 : layers.5.attention_norm.weight      : [8192]          : torch.bfloat16
  54 :       8192 : layers.5.ffn_norm.weight            : [8192]          : torch.bfloat16
  55 :    8388608 : layers.6.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  56 :    1048576 : layers.6.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  57 :    1048576 : layers.6.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  58 :    8388608 : layers.6.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  59 :   29360128 : layers.6.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  60 :   29360128 : layers.6.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  61 :   29360128 : layers.6.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  62 :       8192 : layers.6.attention_norm.weight      : [8192]          : torch.bfloat16
  63 :       8192 : layers.6.ffn_norm.weight            : [8192]          : torch.bfloat16
  64 :    8388608 : layers.7.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  65 :    1048576 : layers.7.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  66 :    1048576 : layers.7.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  67 :    8388608 : layers.7.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  68 :   29360128 : layers.7.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  69 :   29360128 : layers.7.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  70 :   29360128 : layers.7.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  71 :       8192 : layers.7.attention_norm.weight      : [8192]          : torch.bfloat16
  72 :       8192 : layers.7.ffn_norm.weight            : [8192]          : torch.bfloat16
  73 :    8388608 : layers.8.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  74 :    1048576 : layers.8.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  75 :    1048576 : layers.8.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  76 :    8388608 : layers.8.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  77 :   29360128 : layers.8.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  78 :   29360128 : layers.8.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  79 :   29360128 : layers.8.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  80 :       8192 : layers.8.attention_norm.weight      : [8192]          : torch.bfloat16
  81 :       8192 : layers.8.ffn_norm.weight            : [8192]          : torch.bfloat16
  82 :    8388608 : layers.9.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  83 :    1048576 : layers.9.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  84 :    1048576 : layers.9.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  85 :    8388608 : layers.9.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  86 :   29360128 : layers.9.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  87 :   29360128 : layers.9.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  88 :   29360128 : layers.9.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  89 :       8192 : layers.9.attention_norm.weight      : [8192]          : torch.bfloat16
  90 :       8192 : layers.9.ffn_norm.weight            : [8192]          : torch.bfloat16
  91 :    8388608 : layers.10.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
  92 :    1048576 : layers.10.attention.wk.weight       : [128, 8192]     : torch.bfloat16
  93 :    1048576 : layers.10.attention.wv.weight       : [128, 8192]     : torch.bfloat16
  94 :    8388608 : layers.10.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
  95 :   29360128 : layers.10.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
  96 :   29360128 : layers.10.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
  97 :   29360128 : layers.10.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
  98 :       8192 : layers.10.attention_norm.weight     : [8192]          : torch.bfloat16
  99 :       8192 : layers.10.ffn_norm.weight           : [8192]          : torch.bfloat16
 100 :    8388608 : layers.11.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 101 :    1048576 : layers.11.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 102 :    1048576 : layers.11.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 103 :    8388608 : layers.11.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 104 :   29360128 : layers.11.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 105 :   29360128 : layers.11.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 106 :   29360128 : layers.11.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 107 :       8192 : layers.11.attention_norm.weight     : [8192]          : torch.bfloat16
 108 :       8192 : layers.11.ffn_norm.weight           : [8192]          : torch.bfloat16
 109 :    8388608 : layers.12.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 110 :    1048576 : layers.12.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 111 :    1048576 : layers.12.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 112 :    8388608 : layers.12.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 113 :   29360128 : layers.12.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 114 :   29360128 : layers.12.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 115 :   29360128 : layers.12.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 116 :       8192 : layers.12.attention_norm.weight     : [8192]          : torch.bfloat16
 117 :       8192 : layers.12.ffn_norm.weight           : [8192]          : torch.bfloat16
 118 :    8388608 : layers.13.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 119 :    1048576 : layers.13.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 120 :    1048576 : layers.13.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 121 :    8388608 : layers.13.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 122 :   29360128 : layers.13.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 123 :   29360128 : layers.13.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 124 :   29360128 : layers.13.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 125 :       8192 : layers.13.attention_norm.weight     : [8192]          : torch.bfloat16
 126 :       8192 : layers.13.ffn_norm.weight           : [8192]          : torch.bfloat16
 127 :    8388608 : layers.14.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 128 :    1048576 : layers.14.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 129 :    1048576 : layers.14.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 130 :    8388608 : layers.14.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 131 :   29360128 : layers.14.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 132 :   29360128 : layers.14.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 133 :   29360128 : layers.14.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 134 :       8192 : layers.14.attention_norm.weight     : [8192]          : torch.bfloat16
 135 :       8192 : layers.14.ffn_norm.weight           : [8192]          : torch.bfloat16
 136 :    8388608 : layers.15.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 137 :    1048576 : layers.15.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 138 :    1048576 : layers.15.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 139 :    8388608 : layers.15.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 140 :   29360128 : layers.15.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 141 :   29360128 : layers.15.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 142 :   29360128 : layers.15.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 143 :       8192 : layers.15.attention_norm.weight     : [8192]          : torch.bfloat16
 144 :       8192 : layers.15.ffn_norm.weight           : [8192]          : torch.bfloat16
 145 :    8388608 : layers.16.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 146 :    1048576 : layers.16.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 147 :    1048576 : layers.16.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 148 :    8388608 : layers.16.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 149 :   29360128 : layers.16.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 150 :   29360128 : layers.16.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 151 :   29360128 : layers.16.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 152 :       8192 : layers.16.attention_norm.weight     : [8192]          : torch.bfloat16
 153 :       8192 : layers.16.ffn_norm.weight           : [8192]          : torch.bfloat16
 154 :    8388608 : layers.17.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 155 :    1048576 : layers.17.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 156 :    1048576 : layers.17.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 157 :    8388608 : layers.17.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 158 :   29360128 : layers.17.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 159 :   29360128 : layers.17.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 160 :   29360128 : layers.17.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 161 :       8192 : layers.17.attention_norm.weight     : [8192]          : torch.bfloat16
 162 :       8192 : layers.17.ffn_norm.weight           : [8192]          : torch.bfloat16
 163 :    8388608 : layers.18.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 164 :    1048576 : layers.18.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 165 :    1048576 : layers.18.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 166 :    8388608 : layers.18.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 167 :   29360128 : layers.18.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 168 :   29360128 : layers.18.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 169 :   29360128 : layers.18.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 170 :       8192 : layers.18.attention_norm.weight     : [8192]          : torch.bfloat16
 171 :       8192 : layers.18.ffn_norm.weight           : [8192]          : torch.bfloat16
 172 :    8388608 : layers.19.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 173 :    1048576 : layers.19.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 174 :    1048576 : layers.19.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 175 :    8388608 : layers.19.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 176 :   29360128 : layers.19.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 177 :   29360128 : layers.19.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 178 :   29360128 : layers.19.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 179 :       8192 : layers.19.attention_norm.weight     : [8192]          : torch.bfloat16
 180 :       8192 : layers.19.ffn_norm.weight           : [8192]          : torch.bfloat16
 181 :    8388608 : layers.20.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 182 :    1048576 : layers.20.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 183 :    1048576 : layers.20.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 184 :    8388608 : layers.20.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 185 :   29360128 : layers.20.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 186 :   29360128 : layers.20.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 187 :   29360128 : layers.20.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 188 :       8192 : layers.20.attention_norm.weight     : [8192]          : torch.bfloat16
 189 :       8192 : layers.20.ffn_norm.weight           : [8192]          : torch.bfloat16
 190 :    8388608 : layers.21.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 191 :    1048576 : layers.21.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 192 :    1048576 : layers.21.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 193 :    8388608 : layers.21.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 194 :   29360128 : layers.21.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 195 :   29360128 : layers.21.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 196 :   29360128 : layers.21.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 197 :       8192 : layers.21.attention_norm.weight     : [8192]          : torch.bfloat16
 198 :       8192 : layers.21.ffn_norm.weight           : [8192]          : torch.bfloat16
 199 :    8388608 : layers.22.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 200 :    1048576 : layers.22.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 201 :    1048576 : layers.22.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 202 :    8388608 : layers.22.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 203 :   29360128 : layers.22.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 204 :   29360128 : layers.22.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 205 :   29360128 : layers.22.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 206 :       8192 : layers.22.attention_norm.weight     : [8192]          : torch.bfloat16
 207 :       8192 : layers.22.ffn_norm.weight           : [8192]          : torch.bfloat16
 208 :    8388608 : layers.23.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 209 :    1048576 : layers.23.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 210 :    1048576 : layers.23.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 211 :    8388608 : layers.23.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 212 :   29360128 : layers.23.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 213 :   29360128 : layers.23.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 214 :   29360128 : layers.23.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 215 :       8192 : layers.23.attention_norm.weight     : [8192]          : torch.bfloat16
 216 :       8192 : layers.23.ffn_norm.weight           : [8192]          : torch.bfloat16
 217 :    8388608 : layers.24.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 218 :    1048576 : layers.24.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 219 :    1048576 : layers.24.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 220 :    8388608 : layers.24.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 221 :   29360128 : layers.24.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 222 :   29360128 : layers.24.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 223 :   29360128 : layers.24.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 224 :       8192 : layers.24.attention_norm.weight     : [8192]          : torch.bfloat16
 225 :       8192 : layers.24.ffn_norm.weight           : [8192]          : torch.bfloat16
 226 :    8388608 : layers.25.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 227 :    1048576 : layers.25.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 228 :    1048576 : layers.25.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 229 :    8388608 : layers.25.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 230 :   29360128 : layers.25.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 231 :   29360128 : layers.25.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 232 :   29360128 : layers.25.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 233 :       8192 : layers.25.attention_norm.weight     : [8192]          : torch.bfloat16
 234 :       8192 : layers.25.ffn_norm.weight           : [8192]          : torch.bfloat16
 235 :    8388608 : layers.26.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 236 :    1048576 : layers.26.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 237 :    1048576 : layers.26.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 238 :    8388608 : layers.26.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 239 :   29360128 : layers.26.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 240 :   29360128 : layers.26.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 241 :   29360128 : layers.26.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 242 :       8192 : layers.26.attention_norm.weight     : [8192]          : torch.bfloat16
 243 :       8192 : layers.26.ffn_norm.weight           : [8192]          : torch.bfloat16
 244 :    8388608 : layers.27.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 245 :    1048576 : layers.27.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 246 :    1048576 : layers.27.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 247 :    8388608 : layers.27.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 248 :   29360128 : layers.27.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 249 :   29360128 : layers.27.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 250 :   29360128 : layers.27.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 251 :       8192 : layers.27.attention_norm.weight     : [8192]          : torch.bfloat16
 252 :       8192 : layers.27.ffn_norm.weight           : [8192]          : torch.bfloat16
 253 :    8388608 : layers.28.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 254 :    1048576 : layers.28.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 255 :    1048576 : layers.28.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 256 :    8388608 : layers.28.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 257 :   29360128 : layers.28.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 258 :   29360128 : layers.28.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 259 :   29360128 : layers.28.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 260 :       8192 : layers.28.attention_norm.weight     : [8192]          : torch.bfloat16
 261 :       8192 : layers.28.ffn_norm.weight           : [8192]          : torch.bfloat16
 262 :    8388608 : layers.29.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 263 :    1048576 : layers.29.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 264 :    1048576 : layers.29.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 265 :    8388608 : layers.29.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 266 :   29360128 : layers.29.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 267 :   29360128 : layers.29.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 268 :   29360128 : layers.29.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 269 :       8192 : layers.29.attention_norm.weight     : [8192]          : torch.bfloat16
 270 :       8192 : layers.29.ffn_norm.weight           : [8192]          : torch.bfloat16
 271 :    8388608 : layers.30.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 272 :    1048576 : layers.30.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 273 :    1048576 : layers.30.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 274 :    8388608 : layers.30.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 275 :   29360128 : layers.30.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 276 :   29360128 : layers.30.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 277 :   29360128 : layers.30.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 278 :       8192 : layers.30.attention_norm.weight     : [8192]          : torch.bfloat16
 279 :       8192 : layers.30.ffn_norm.weight           : [8192]          : torch.bfloat16
 280 :    8388608 : layers.31.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 281 :    1048576 : layers.31.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 282 :    1048576 : layers.31.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 283 :    8388608 : layers.31.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 284 :   29360128 : layers.31.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 285 :   29360128 : layers.31.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 286 :   29360128 : layers.31.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 287 :       8192 : layers.31.attention_norm.weight     : [8192]          : torch.bfloat16
 288 :       8192 : layers.31.ffn_norm.weight           : [8192]          : torch.bfloat16
 289 :    8388608 : layers.32.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 290 :    1048576 : layers.32.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 291 :    1048576 : layers.32.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 292 :    8388608 : layers.32.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 293 :   29360128 : layers.32.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 294 :   29360128 : layers.32.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 295 :   29360128 : layers.32.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 296 :       8192 : layers.32.attention_norm.weight     : [8192]          : torch.bfloat16
 297 :       8192 : layers.32.ffn_norm.weight           : [8192]          : torch.bfloat16
 298 :    8388608 : layers.33.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 299 :    1048576 : layers.33.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 300 :    1048576 : layers.33.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 301 :    8388608 : layers.33.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 302 :   29360128 : layers.33.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 303 :   29360128 : layers.33.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 304 :   29360128 : layers.33.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 305 :       8192 : layers.33.attention_norm.weight     : [8192]          : torch.bfloat16
 306 :       8192 : layers.33.ffn_norm.weight           : [8192]          : torch.bfloat16
 307 :    8388608 : layers.34.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 308 :    1048576 : layers.34.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 309 :    1048576 : layers.34.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 310 :    8388608 : layers.34.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 311 :   29360128 : layers.34.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 312 :   29360128 : layers.34.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 313 :   29360128 : layers.34.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 314 :       8192 : layers.34.attention_norm.weight     : [8192]          : torch.bfloat16
 315 :       8192 : layers.34.ffn_norm.weight           : [8192]          : torch.bfloat16
 316 :    8388608 : layers.35.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 317 :    1048576 : layers.35.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 318 :    1048576 : layers.35.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 319 :    8388608 : layers.35.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 320 :   29360128 : layers.35.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 321 :   29360128 : layers.35.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 322 :   29360128 : layers.35.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 323 :       8192 : layers.35.attention_norm.weight     : [8192]          : torch.bfloat16
 324 :       8192 : layers.35.ffn_norm.weight           : [8192]          : torch.bfloat16
 325 :    8388608 : layers.36.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 326 :    1048576 : layers.36.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 327 :    1048576 : layers.36.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 328 :    8388608 : layers.36.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 329 :   29360128 : layers.36.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 330 :   29360128 : layers.36.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 331 :   29360128 : layers.36.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 332 :       8192 : layers.36.attention_norm.weight     : [8192]          : torch.bfloat16
 333 :       8192 : layers.36.ffn_norm.weight           : [8192]          : torch.bfloat16
 334 :    8388608 : layers.37.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 335 :    1048576 : layers.37.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 336 :    1048576 : layers.37.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 337 :    8388608 : layers.37.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 338 :   29360128 : layers.37.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 339 :   29360128 : layers.37.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 340 :   29360128 : layers.37.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 341 :       8192 : layers.37.attention_norm.weight     : [8192]          : torch.bfloat16
 342 :       8192 : layers.37.ffn_norm.weight           : [8192]          : torch.bfloat16
 343 :    8388608 : layers.38.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 344 :    1048576 : layers.38.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 345 :    1048576 : layers.38.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 346 :    8388608 : layers.38.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 347 :   29360128 : layers.38.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 348 :   29360128 : layers.38.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 349 :   29360128 : layers.38.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 350 :       8192 : layers.38.attention_norm.weight     : [8192]          : torch.bfloat16
 351 :       8192 : layers.38.ffn_norm.weight           : [8192]          : torch.bfloat16
 352 :    8388608 : layers.39.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 353 :    1048576 : layers.39.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 354 :    1048576 : layers.39.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 355 :    8388608 : layers.39.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 356 :   29360128 : layers.39.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 357 :   29360128 : layers.39.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 358 :   29360128 : layers.39.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 359 :       8192 : layers.39.attention_norm.weight     : [8192]          : torch.bfloat16
 360 :       8192 : layers.39.ffn_norm.weight           : [8192]          : torch.bfloat16
 361 :    8388608 : layers.40.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 362 :    1048576 : layers.40.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 363 :    1048576 : layers.40.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 364 :    8388608 : layers.40.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 365 :   29360128 : layers.40.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 366 :   29360128 : layers.40.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 367 :   29360128 : layers.40.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 368 :       8192 : layers.40.attention_norm.weight     : [8192]          : torch.bfloat16
 369 :       8192 : layers.40.ffn_norm.weight           : [8192]          : torch.bfloat16
 370 :    8388608 : layers.41.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 371 :    1048576 : layers.41.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 372 :    1048576 : layers.41.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 373 :    8388608 : layers.41.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 374 :   29360128 : layers.41.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 375 :   29360128 : layers.41.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 376 :   29360128 : layers.41.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 377 :       8192 : layers.41.attention_norm.weight     : [8192]          : torch.bfloat16
 378 :       8192 : layers.41.ffn_norm.weight           : [8192]          : torch.bfloat16
 379 :    8388608 : layers.42.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 380 :    1048576 : layers.42.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 381 :    1048576 : layers.42.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 382 :    8388608 : layers.42.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 383 :   29360128 : layers.42.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 384 :   29360128 : layers.42.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 385 :   29360128 : layers.42.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 386 :       8192 : layers.42.attention_norm.weight     : [8192]          : torch.bfloat16
 387 :       8192 : layers.42.ffn_norm.weight           : [8192]          : torch.bfloat16
 388 :    8388608 : layers.43.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 389 :    1048576 : layers.43.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 390 :    1048576 : layers.43.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 391 :    8388608 : layers.43.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 392 :   29360128 : layers.43.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 393 :   29360128 : layers.43.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 394 :   29360128 : layers.43.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 395 :       8192 : layers.43.attention_norm.weight     : [8192]          : torch.bfloat16
 396 :       8192 : layers.43.ffn_norm.weight           : [8192]          : torch.bfloat16
 397 :    8388608 : layers.44.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 398 :    1048576 : layers.44.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 399 :    1048576 : layers.44.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 400 :    8388608 : layers.44.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 401 :   29360128 : layers.44.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 402 :   29360128 : layers.44.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 403 :   29360128 : layers.44.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 404 :       8192 : layers.44.attention_norm.weight     : [8192]          : torch.bfloat16
 405 :       8192 : layers.44.ffn_norm.weight           : [8192]          : torch.bfloat16
 406 :    8388608 : layers.45.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 407 :    1048576 : layers.45.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 408 :    1048576 : layers.45.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 409 :    8388608 : layers.45.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 410 :   29360128 : layers.45.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 411 :   29360128 : layers.45.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 412 :   29360128 : layers.45.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 413 :       8192 : layers.45.attention_norm.weight     : [8192]          : torch.bfloat16
 414 :       8192 : layers.45.ffn_norm.weight           : [8192]          : torch.bfloat16
 415 :    8388608 : layers.46.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 416 :    1048576 : layers.46.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 417 :    1048576 : layers.46.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 418 :    8388608 : layers.46.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 419 :   29360128 : layers.46.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 420 :   29360128 : layers.46.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 421 :   29360128 : layers.46.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 422 :       8192 : layers.46.attention_norm.weight     : [8192]          : torch.bfloat16
 423 :       8192 : layers.46.ffn_norm.weight           : [8192]          : torch.bfloat16
 424 :    8388608 : layers.47.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 425 :    1048576 : layers.47.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 426 :    1048576 : layers.47.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 427 :    8388608 : layers.47.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 428 :   29360128 : layers.47.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 429 :   29360128 : layers.47.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 430 :   29360128 : layers.47.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 431 :       8192 : layers.47.attention_norm.weight     : [8192]          : torch.bfloat16
 432 :       8192 : layers.47.ffn_norm.weight           : [8192]          : torch.bfloat16
 433 :    8388608 : layers.48.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 434 :    1048576 : layers.48.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 435 :    1048576 : layers.48.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 436 :    8388608 : layers.48.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 437 :   29360128 : layers.48.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 438 :   29360128 : layers.48.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 439 :   29360128 : layers.48.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 440 :       8192 : layers.48.attention_norm.weight     : [8192]          : torch.bfloat16
 441 :       8192 : layers.48.ffn_norm.weight           : [8192]          : torch.bfloat16
 442 :    8388608 : layers.49.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 443 :    1048576 : layers.49.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 444 :    1048576 : layers.49.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 445 :    8388608 : layers.49.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 446 :   29360128 : layers.49.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 447 :   29360128 : layers.49.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 448 :   29360128 : layers.49.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 449 :       8192 : layers.49.attention_norm.weight     : [8192]          : torch.bfloat16
 450 :       8192 : layers.49.ffn_norm.weight           : [8192]          : torch.bfloat16
 451 :    8388608 : layers.50.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 452 :    1048576 : layers.50.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 453 :    1048576 : layers.50.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 454 :    8388608 : layers.50.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 455 :   29360128 : layers.50.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 456 :   29360128 : layers.50.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 457 :   29360128 : layers.50.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 458 :       8192 : layers.50.attention_norm.weight     : [8192]          : torch.bfloat16
 459 :       8192 : layers.50.ffn_norm.weight           : [8192]          : torch.bfloat16
 460 :    8388608 : layers.51.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 461 :    1048576 : layers.51.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 462 :    1048576 : layers.51.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 463 :    8388608 : layers.51.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 464 :   29360128 : layers.51.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 465 :   29360128 : layers.51.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 466 :   29360128 : layers.51.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 467 :       8192 : layers.51.attention_norm.weight     : [8192]          : torch.bfloat16
 468 :       8192 : layers.51.ffn_norm.weight           : [8192]          : torch.bfloat16
 469 :    8388608 : layers.52.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 470 :    1048576 : layers.52.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 471 :    1048576 : layers.52.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 472 :    8388608 : layers.52.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 473 :   29360128 : layers.52.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 474 :   29360128 : layers.52.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 475 :   29360128 : layers.52.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 476 :       8192 : layers.52.attention_norm.weight     : [8192]          : torch.bfloat16
 477 :       8192 : layers.52.ffn_norm.weight           : [8192]          : torch.bfloat16
 478 :    8388608 : layers.53.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 479 :    1048576 : layers.53.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 480 :    1048576 : layers.53.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 481 :    8388608 : layers.53.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 482 :   29360128 : layers.53.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 483 :   29360128 : layers.53.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 484 :   29360128 : layers.53.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 485 :       8192 : layers.53.attention_norm.weight     : [8192]          : torch.bfloat16
 486 :       8192 : layers.53.ffn_norm.weight           : [8192]          : torch.bfloat16
 487 :    8388608 : layers.54.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 488 :    1048576 : layers.54.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 489 :    1048576 : layers.54.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 490 :    8388608 : layers.54.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 491 :   29360128 : layers.54.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 492 :   29360128 : layers.54.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 493 :   29360128 : layers.54.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 494 :       8192 : layers.54.attention_norm.weight     : [8192]          : torch.bfloat16
 495 :       8192 : layers.54.ffn_norm.weight           : [8192]          : torch.bfloat16
 496 :    8388608 : layers.55.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 497 :    1048576 : layers.55.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 498 :    1048576 : layers.55.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 499 :    8388608 : layers.55.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 500 :   29360128 : layers.55.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 501 :   29360128 : layers.55.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 502 :   29360128 : layers.55.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 503 :       8192 : layers.55.attention_norm.weight     : [8192]          : torch.bfloat16
 504 :       8192 : layers.55.ffn_norm.weight           : [8192]          : torch.bfloat16
 505 :    8388608 : layers.56.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 506 :    1048576 : layers.56.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 507 :    1048576 : layers.56.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 508 :    8388608 : layers.56.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 509 :   29360128 : layers.56.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 510 :   29360128 : layers.56.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 511 :   29360128 : layers.56.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 512 :       8192 : layers.56.attention_norm.weight     : [8192]          : torch.bfloat16
 513 :       8192 : layers.56.ffn_norm.weight           : [8192]          : torch.bfloat16
 514 :    8388608 : layers.57.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 515 :    1048576 : layers.57.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 516 :    1048576 : layers.57.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 517 :    8388608 : layers.57.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 518 :   29360128 : layers.57.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 519 :   29360128 : layers.57.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 520 :   29360128 : layers.57.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 521 :       8192 : layers.57.attention_norm.weight     : [8192]          : torch.bfloat16
 522 :       8192 : layers.57.ffn_norm.weight           : [8192]          : torch.bfloat16
 523 :    8388608 : layers.58.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 524 :    1048576 : layers.58.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 525 :    1048576 : layers.58.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 526 :    8388608 : layers.58.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 527 :   29360128 : layers.58.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 528 :   29360128 : layers.58.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 529 :   29360128 : layers.58.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 530 :       8192 : layers.58.attention_norm.weight     : [8192]          : torch.bfloat16
 531 :       8192 : layers.58.ffn_norm.weight           : [8192]          : torch.bfloat16
 532 :    8388608 : layers.59.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 533 :    1048576 : layers.59.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 534 :    1048576 : layers.59.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 535 :    8388608 : layers.59.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 536 :   29360128 : layers.59.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 537 :   29360128 : layers.59.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 538 :   29360128 : layers.59.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 539 :       8192 : layers.59.attention_norm.weight     : [8192]          : torch.bfloat16
 540 :       8192 : layers.59.ffn_norm.weight           : [8192]          : torch.bfloat16
 541 :    8388608 : layers.60.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 542 :    1048576 : layers.60.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 543 :    1048576 : layers.60.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 544 :    8388608 : layers.60.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 545 :   29360128 : layers.60.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 546 :   29360128 : layers.60.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 547 :   29360128 : layers.60.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 548 :       8192 : layers.60.attention_norm.weight     : [8192]          : torch.bfloat16
 549 :       8192 : layers.60.ffn_norm.weight           : [8192]          : torch.bfloat16
 550 :    8388608 : layers.61.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 551 :    1048576 : layers.61.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 552 :    1048576 : layers.61.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 553 :    8388608 : layers.61.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 554 :   29360128 : layers.61.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 555 :   29360128 : layers.61.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 556 :   29360128 : layers.61.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 557 :       8192 : layers.61.attention_norm.weight     : [8192]          : torch.bfloat16
 558 :       8192 : layers.61.ffn_norm.weight           : [8192]          : torch.bfloat16
 559 :    8388608 : layers.62.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 560 :    1048576 : layers.62.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 561 :    1048576 : layers.62.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 562 :    8388608 : layers.62.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 563 :   29360128 : layers.62.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 564 :   29360128 : layers.62.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 565 :   29360128 : layers.62.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 566 :       8192 : layers.62.attention_norm.weight     : [8192]          : torch.bfloat16
 567 :       8192 : layers.62.ffn_norm.weight           : [8192]          : torch.bfloat16
 568 :    8388608 : layers.63.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 569 :    1048576 : layers.63.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 570 :    1048576 : layers.63.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 571 :    8388608 : layers.63.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 572 :   29360128 : layers.63.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 573 :   29360128 : layers.63.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 574 :   29360128 : layers.63.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 575 :       8192 : layers.63.attention_norm.weight     : [8192]          : torch.bfloat16
 576 :       8192 : layers.63.ffn_norm.weight           : [8192]          : torch.bfloat16
 577 :    8388608 : layers.64.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 578 :    1048576 : layers.64.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 579 :    1048576 : layers.64.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 580 :    8388608 : layers.64.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 581 :   29360128 : layers.64.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 582 :   29360128 : layers.64.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 583 :   29360128 : layers.64.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 584 :       8192 : layers.64.attention_norm.weight     : [8192]          : torch.bfloat16
 585 :       8192 : layers.64.ffn_norm.weight           : [8192]          : torch.bfloat16
 586 :    8388608 : layers.65.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 587 :    1048576 : layers.65.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 588 :    1048576 : layers.65.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 589 :    8388608 : layers.65.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 590 :   29360128 : layers.65.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 591 :   29360128 : layers.65.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 592 :   29360128 : layers.65.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 593 :       8192 : layers.65.attention_norm.weight     : [8192]          : torch.bfloat16
 594 :       8192 : layers.65.ffn_norm.weight           : [8192]          : torch.bfloat16
 595 :    8388608 : layers.66.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 596 :    1048576 : layers.66.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 597 :    1048576 : layers.66.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 598 :    8388608 : layers.66.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 599 :   29360128 : layers.66.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 600 :   29360128 : layers.66.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 601 :   29360128 : layers.66.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 602 :       8192 : layers.66.attention_norm.weight     : [8192]          : torch.bfloat16
 603 :       8192 : layers.66.ffn_norm.weight           : [8192]          : torch.bfloat16
 604 :    8388608 : layers.67.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 605 :    1048576 : layers.67.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 606 :    1048576 : layers.67.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 607 :    8388608 : layers.67.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 608 :   29360128 : layers.67.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 609 :   29360128 : layers.67.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 610 :   29360128 : layers.67.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 611 :       8192 : layers.67.attention_norm.weight     : [8192]          : torch.bfloat16
 612 :       8192 : layers.67.ffn_norm.weight           : [8192]          : torch.bfloat16
 613 :    8388608 : layers.68.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 614 :    1048576 : layers.68.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 615 :    1048576 : layers.68.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 616 :    8388608 : layers.68.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 617 :   29360128 : layers.68.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 618 :   29360128 : layers.68.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 619 :   29360128 : layers.68.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 620 :       8192 : layers.68.attention_norm.weight     : [8192]          : torch.bfloat16
 621 :       8192 : layers.68.ffn_norm.weight           : [8192]          : torch.bfloat16
 622 :    8388608 : layers.69.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 623 :    1048576 : layers.69.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 624 :    1048576 : layers.69.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 625 :    8388608 : layers.69.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 626 :   29360128 : layers.69.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 627 :   29360128 : layers.69.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 628 :   29360128 : layers.69.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 629 :       8192 : layers.69.attention_norm.weight     : [8192]          : torch.bfloat16
 630 :       8192 : layers.69.ffn_norm.weight           : [8192]          : torch.bfloat16
 631 :    8388608 : layers.70.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 632 :    1048576 : layers.70.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 633 :    1048576 : layers.70.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 634 :    8388608 : layers.70.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 635 :   29360128 : layers.70.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 636 :   29360128 : layers.70.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 637 :   29360128 : layers.70.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 638 :       8192 : layers.70.attention_norm.weight     : [8192]          : torch.bfloat16
 639 :       8192 : layers.70.ffn_norm.weight           : [8192]          : torch.bfloat16
 640 :    8388608 : layers.71.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 641 :    1048576 : layers.71.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 642 :    1048576 : layers.71.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 643 :    8388608 : layers.71.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 644 :   29360128 : layers.71.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 645 :   29360128 : layers.71.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 646 :   29360128 : layers.71.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 647 :       8192 : layers.71.attention_norm.weight     : [8192]          : torch.bfloat16
 648 :       8192 : layers.71.ffn_norm.weight           : [8192]          : torch.bfloat16
 649 :    8388608 : layers.72.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 650 :    1048576 : layers.72.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 651 :    1048576 : layers.72.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 652 :    8388608 : layers.72.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 653 :   29360128 : layers.72.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 654 :   29360128 : layers.72.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 655 :   29360128 : layers.72.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 656 :       8192 : layers.72.attention_norm.weight     : [8192]          : torch.bfloat16
 657 :       8192 : layers.72.ffn_norm.weight           : [8192]          : torch.bfloat16
 658 :    8388608 : layers.73.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 659 :    1048576 : layers.73.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 660 :    1048576 : layers.73.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 661 :    8388608 : layers.73.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 662 :   29360128 : layers.73.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 663 :   29360128 : layers.73.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 664 :   29360128 : layers.73.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 665 :       8192 : layers.73.attention_norm.weight     : [8192]          : torch.bfloat16
 666 :       8192 : layers.73.ffn_norm.weight           : [8192]          : torch.bfloat16
 667 :    8388608 : layers.74.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 668 :    1048576 : layers.74.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 669 :    1048576 : layers.74.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 670 :    8388608 : layers.74.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 671 :   29360128 : layers.74.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 672 :   29360128 : layers.74.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 673 :   29360128 : layers.74.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 674 :       8192 : layers.74.attention_norm.weight     : [8192]          : torch.bfloat16
 675 :       8192 : layers.74.ffn_norm.weight           : [8192]          : torch.bfloat16
 676 :    8388608 : layers.75.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 677 :    1048576 : layers.75.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 678 :    1048576 : layers.75.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 679 :    8388608 : layers.75.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 680 :   29360128 : layers.75.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 681 :   29360128 : layers.75.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 682 :   29360128 : layers.75.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 683 :       8192 : layers.75.attention_norm.weight     : [8192]          : torch.bfloat16
 684 :       8192 : layers.75.ffn_norm.weight           : [8192]          : torch.bfloat16
 685 :    8388608 : layers.76.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 686 :    1048576 : layers.76.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 687 :    1048576 : layers.76.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 688 :    8388608 : layers.76.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 689 :   29360128 : layers.76.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 690 :   29360128 : layers.76.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 691 :   29360128 : layers.76.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 692 :       8192 : layers.76.attention_norm.weight     : [8192]          : torch.bfloat16
 693 :       8192 : layers.76.ffn_norm.weight           : [8192]          : torch.bfloat16
 694 :    8388608 : layers.77.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 695 :    1048576 : layers.77.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 696 :    1048576 : layers.77.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 697 :    8388608 : layers.77.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 698 :   29360128 : layers.77.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 699 :   29360128 : layers.77.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 700 :   29360128 : layers.77.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 701 :       8192 : layers.77.attention_norm.weight     : [8192]          : torch.bfloat16
 702 :       8192 : layers.77.ffn_norm.weight           : [8192]          : torch.bfloat16
 703 :    8388608 : layers.78.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 704 :    1048576 : layers.78.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 705 :    1048576 : layers.78.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 706 :    8388608 : layers.78.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 707 :   29360128 : layers.78.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 708 :   29360128 : layers.78.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 709 :   29360128 : layers.78.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 710 :       8192 : layers.78.attention_norm.weight     : [8192]          : torch.bfloat16
 711 :       8192 : layers.78.ffn_norm.weight           : [8192]          : torch.bfloat16
 712 :    8388608 : layers.79.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 713 :    1048576 : layers.79.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 714 :    1048576 : layers.79.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 715 :    8388608 : layers.79.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 716 :   29360128 : layers.79.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 717 :   29360128 : layers.79.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 718 :   29360128 : layers.79.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 719 :       8192 : layers.79.attention_norm.weight     : [8192]          : torch.bfloat16
 720 :       8192 : layers.79.ffn_norm.weight           : [8192]          : torch.bfloat16
 721 :       8192 : norm.weight                         : [8192]          : torch.bfloat16
 722 :  131334144 : output.weight                       : [16032, 8192]   : torch.bfloat16

-----------------------------------------------------------------------------
26461102080 params in total.
52922204160 bytes in total.
18.55 sec, 6.39 sec, 2721.17 MB/s
-----------------------------------------------------------------------------

[4/8]: Loading original/consolidated.03.pth
   0 :  131334144 : tok_embeddings.weight               : [16032, 8192]   : torch.bfloat16
   1 :    8388608 : layers.0.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
   2 :    1048576 : layers.0.attention.wk.weight        : [128, 8192]     : torch.bfloat16
   3 :    1048576 : layers.0.attention.wv.weight        : [128, 8192]     : torch.bfloat16
   4 :    8388608 : layers.0.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
   5 :   29360128 : layers.0.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
   6 :   29360128 : layers.0.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
   7 :   29360128 : layers.0.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
   8 :       8192 : layers.0.attention_norm.weight      : [8192]          : torch.bfloat16
   9 :       8192 : layers.0.ffn_norm.weight            : [8192]          : torch.bfloat16
  10 :    8388608 : layers.1.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  11 :    1048576 : layers.1.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  12 :    1048576 : layers.1.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  13 :    8388608 : layers.1.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  14 :   29360128 : layers.1.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  15 :   29360128 : layers.1.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  16 :   29360128 : layers.1.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  17 :       8192 : layers.1.attention_norm.weight      : [8192]          : torch.bfloat16
  18 :       8192 : layers.1.ffn_norm.weight            : [8192]          : torch.bfloat16
  19 :    8388608 : layers.2.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  20 :    1048576 : layers.2.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  21 :    1048576 : layers.2.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  22 :    8388608 : layers.2.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  23 :   29360128 : layers.2.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  24 :   29360128 : layers.2.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  25 :   29360128 : layers.2.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  26 :       8192 : layers.2.attention_norm.weight      : [8192]          : torch.bfloat16
  27 :       8192 : layers.2.ffn_norm.weight            : [8192]          : torch.bfloat16
  28 :    8388608 : layers.3.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  29 :    1048576 : layers.3.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  30 :    1048576 : layers.3.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  31 :    8388608 : layers.3.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  32 :   29360128 : layers.3.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  33 :   29360128 : layers.3.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  34 :   29360128 : layers.3.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  35 :       8192 : layers.3.attention_norm.weight      : [8192]          : torch.bfloat16
  36 :       8192 : layers.3.ffn_norm.weight            : [8192]          : torch.bfloat16
  37 :    8388608 : layers.4.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  38 :    1048576 : layers.4.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  39 :    1048576 : layers.4.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  40 :    8388608 : layers.4.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  41 :   29360128 : layers.4.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  42 :   29360128 : layers.4.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  43 :   29360128 : layers.4.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  44 :       8192 : layers.4.attention_norm.weight      : [8192]          : torch.bfloat16
  45 :       8192 : layers.4.ffn_norm.weight            : [8192]          : torch.bfloat16
  46 :    8388608 : layers.5.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  47 :    1048576 : layers.5.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  48 :    1048576 : layers.5.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  49 :    8388608 : layers.5.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  50 :   29360128 : layers.5.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  51 :   29360128 : layers.5.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  52 :   29360128 : layers.5.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  53 :       8192 : layers.5.attention_norm.weight      : [8192]          : torch.bfloat16
  54 :       8192 : layers.5.ffn_norm.weight            : [8192]          : torch.bfloat16
  55 :    8388608 : layers.6.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  56 :    1048576 : layers.6.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  57 :    1048576 : layers.6.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  58 :    8388608 : layers.6.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  59 :   29360128 : layers.6.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  60 :   29360128 : layers.6.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  61 :   29360128 : layers.6.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  62 :       8192 : layers.6.attention_norm.weight      : [8192]          : torch.bfloat16
  63 :       8192 : layers.6.ffn_norm.weight            : [8192]          : torch.bfloat16
  64 :    8388608 : layers.7.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  65 :    1048576 : layers.7.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  66 :    1048576 : layers.7.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  67 :    8388608 : layers.7.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  68 :   29360128 : layers.7.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  69 :   29360128 : layers.7.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  70 :   29360128 : layers.7.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  71 :       8192 : layers.7.attention_norm.weight      : [8192]          : torch.bfloat16
  72 :       8192 : layers.7.ffn_norm.weight            : [8192]          : torch.bfloat16
  73 :    8388608 : layers.8.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  74 :    1048576 : layers.8.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  75 :    1048576 : layers.8.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  76 :    8388608 : layers.8.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  77 :   29360128 : layers.8.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  78 :   29360128 : layers.8.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  79 :   29360128 : layers.8.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  80 :       8192 : layers.8.attention_norm.weight      : [8192]          : torch.bfloat16
  81 :       8192 : layers.8.ffn_norm.weight            : [8192]          : torch.bfloat16
  82 :    8388608 : layers.9.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  83 :    1048576 : layers.9.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  84 :    1048576 : layers.9.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  85 :    8388608 : layers.9.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  86 :   29360128 : layers.9.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  87 :   29360128 : layers.9.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  88 :   29360128 : layers.9.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  89 :       8192 : layers.9.attention_norm.weight      : [8192]          : torch.bfloat16
  90 :       8192 : layers.9.ffn_norm.weight            : [8192]          : torch.bfloat16
  91 :    8388608 : layers.10.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
  92 :    1048576 : layers.10.attention.wk.weight       : [128, 8192]     : torch.bfloat16
  93 :    1048576 : layers.10.attention.wv.weight       : [128, 8192]     : torch.bfloat16
  94 :    8388608 : layers.10.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
  95 :   29360128 : layers.10.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
  96 :   29360128 : layers.10.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
  97 :   29360128 : layers.10.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
  98 :       8192 : layers.10.attention_norm.weight     : [8192]          : torch.bfloat16
  99 :       8192 : layers.10.ffn_norm.weight           : [8192]          : torch.bfloat16
 100 :    8388608 : layers.11.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 101 :    1048576 : layers.11.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 102 :    1048576 : layers.11.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 103 :    8388608 : layers.11.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 104 :   29360128 : layers.11.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 105 :   29360128 : layers.11.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 106 :   29360128 : layers.11.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 107 :       8192 : layers.11.attention_norm.weight     : [8192]          : torch.bfloat16
 108 :       8192 : layers.11.ffn_norm.weight           : [8192]          : torch.bfloat16
 109 :    8388608 : layers.12.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 110 :    1048576 : layers.12.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 111 :    1048576 : layers.12.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 112 :    8388608 : layers.12.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 113 :   29360128 : layers.12.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 114 :   29360128 : layers.12.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 115 :   29360128 : layers.12.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 116 :       8192 : layers.12.attention_norm.weight     : [8192]          : torch.bfloat16
 117 :       8192 : layers.12.ffn_norm.weight           : [8192]          : torch.bfloat16
 118 :    8388608 : layers.13.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 119 :    1048576 : layers.13.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 120 :    1048576 : layers.13.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 121 :    8388608 : layers.13.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 122 :   29360128 : layers.13.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 123 :   29360128 : layers.13.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 124 :   29360128 : layers.13.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 125 :       8192 : layers.13.attention_norm.weight     : [8192]          : torch.bfloat16
 126 :       8192 : layers.13.ffn_norm.weight           : [8192]          : torch.bfloat16
 127 :    8388608 : layers.14.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 128 :    1048576 : layers.14.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 129 :    1048576 : layers.14.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 130 :    8388608 : layers.14.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 131 :   29360128 : layers.14.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 132 :   29360128 : layers.14.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 133 :   29360128 : layers.14.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 134 :       8192 : layers.14.attention_norm.weight     : [8192]          : torch.bfloat16
 135 :       8192 : layers.14.ffn_norm.weight           : [8192]          : torch.bfloat16
 136 :    8388608 : layers.15.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 137 :    1048576 : layers.15.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 138 :    1048576 : layers.15.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 139 :    8388608 : layers.15.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 140 :   29360128 : layers.15.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 141 :   29360128 : layers.15.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 142 :   29360128 : layers.15.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 143 :       8192 : layers.15.attention_norm.weight     : [8192]          : torch.bfloat16
 144 :       8192 : layers.15.ffn_norm.weight           : [8192]          : torch.bfloat16
 145 :    8388608 : layers.16.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 146 :    1048576 : layers.16.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 147 :    1048576 : layers.16.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 148 :    8388608 : layers.16.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 149 :   29360128 : layers.16.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 150 :   29360128 : layers.16.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 151 :   29360128 : layers.16.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 152 :       8192 : layers.16.attention_norm.weight     : [8192]          : torch.bfloat16
 153 :       8192 : layers.16.ffn_norm.weight           : [8192]          : torch.bfloat16
 154 :    8388608 : layers.17.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 155 :    1048576 : layers.17.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 156 :    1048576 : layers.17.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 157 :    8388608 : layers.17.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 158 :   29360128 : layers.17.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 159 :   29360128 : layers.17.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 160 :   29360128 : layers.17.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 161 :       8192 : layers.17.attention_norm.weight     : [8192]          : torch.bfloat16
 162 :       8192 : layers.17.ffn_norm.weight           : [8192]          : torch.bfloat16
 163 :    8388608 : layers.18.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 164 :    1048576 : layers.18.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 165 :    1048576 : layers.18.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 166 :    8388608 : layers.18.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 167 :   29360128 : layers.18.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 168 :   29360128 : layers.18.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 169 :   29360128 : layers.18.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 170 :       8192 : layers.18.attention_norm.weight     : [8192]          : torch.bfloat16
 171 :       8192 : layers.18.ffn_norm.weight           : [8192]          : torch.bfloat16
 172 :    8388608 : layers.19.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 173 :    1048576 : layers.19.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 174 :    1048576 : layers.19.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 175 :    8388608 : layers.19.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 176 :   29360128 : layers.19.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 177 :   29360128 : layers.19.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 178 :   29360128 : layers.19.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 179 :       8192 : layers.19.attention_norm.weight     : [8192]          : torch.bfloat16
 180 :       8192 : layers.19.ffn_norm.weight           : [8192]          : torch.bfloat16
 181 :    8388608 : layers.20.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 182 :    1048576 : layers.20.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 183 :    1048576 : layers.20.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 184 :    8388608 : layers.20.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 185 :   29360128 : layers.20.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 186 :   29360128 : layers.20.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 187 :   29360128 : layers.20.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 188 :       8192 : layers.20.attention_norm.weight     : [8192]          : torch.bfloat16
 189 :       8192 : layers.20.ffn_norm.weight           : [8192]          : torch.bfloat16
 190 :    8388608 : layers.21.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 191 :    1048576 : layers.21.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 192 :    1048576 : layers.21.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 193 :    8388608 : layers.21.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 194 :   29360128 : layers.21.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 195 :   29360128 : layers.21.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 196 :   29360128 : layers.21.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 197 :       8192 : layers.21.attention_norm.weight     : [8192]          : torch.bfloat16
 198 :       8192 : layers.21.ffn_norm.weight           : [8192]          : torch.bfloat16
 199 :    8388608 : layers.22.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 200 :    1048576 : layers.22.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 201 :    1048576 : layers.22.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 202 :    8388608 : layers.22.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 203 :   29360128 : layers.22.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 204 :   29360128 : layers.22.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 205 :   29360128 : layers.22.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 206 :       8192 : layers.22.attention_norm.weight     : [8192]          : torch.bfloat16
 207 :       8192 : layers.22.ffn_norm.weight           : [8192]          : torch.bfloat16
 208 :    8388608 : layers.23.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 209 :    1048576 : layers.23.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 210 :    1048576 : layers.23.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 211 :    8388608 : layers.23.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 212 :   29360128 : layers.23.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 213 :   29360128 : layers.23.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 214 :   29360128 : layers.23.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 215 :       8192 : layers.23.attention_norm.weight     : [8192]          : torch.bfloat16
 216 :       8192 : layers.23.ffn_norm.weight           : [8192]          : torch.bfloat16
 217 :    8388608 : layers.24.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 218 :    1048576 : layers.24.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 219 :    1048576 : layers.24.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 220 :    8388608 : layers.24.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 221 :   29360128 : layers.24.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 222 :   29360128 : layers.24.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 223 :   29360128 : layers.24.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 224 :       8192 : layers.24.attention_norm.weight     : [8192]          : torch.bfloat16
 225 :       8192 : layers.24.ffn_norm.weight           : [8192]          : torch.bfloat16
 226 :    8388608 : layers.25.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 227 :    1048576 : layers.25.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 228 :    1048576 : layers.25.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 229 :    8388608 : layers.25.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 230 :   29360128 : layers.25.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 231 :   29360128 : layers.25.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 232 :   29360128 : layers.25.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 233 :       8192 : layers.25.attention_norm.weight     : [8192]          : torch.bfloat16
 234 :       8192 : layers.25.ffn_norm.weight           : [8192]          : torch.bfloat16
 235 :    8388608 : layers.26.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 236 :    1048576 : layers.26.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 237 :    1048576 : layers.26.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 238 :    8388608 : layers.26.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 239 :   29360128 : layers.26.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 240 :   29360128 : layers.26.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 241 :   29360128 : layers.26.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 242 :       8192 : layers.26.attention_norm.weight     : [8192]          : torch.bfloat16
 243 :       8192 : layers.26.ffn_norm.weight           : [8192]          : torch.bfloat16
 244 :    8388608 : layers.27.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 245 :    1048576 : layers.27.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 246 :    1048576 : layers.27.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 247 :    8388608 : layers.27.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 248 :   29360128 : layers.27.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 249 :   29360128 : layers.27.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 250 :   29360128 : layers.27.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 251 :       8192 : layers.27.attention_norm.weight     : [8192]          : torch.bfloat16
 252 :       8192 : layers.27.ffn_norm.weight           : [8192]          : torch.bfloat16
 253 :    8388608 : layers.28.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 254 :    1048576 : layers.28.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 255 :    1048576 : layers.28.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 256 :    8388608 : layers.28.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 257 :   29360128 : layers.28.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 258 :   29360128 : layers.28.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 259 :   29360128 : layers.28.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 260 :       8192 : layers.28.attention_norm.weight     : [8192]          : torch.bfloat16
 261 :       8192 : layers.28.ffn_norm.weight           : [8192]          : torch.bfloat16
 262 :    8388608 : layers.29.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 263 :    1048576 : layers.29.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 264 :    1048576 : layers.29.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 265 :    8388608 : layers.29.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 266 :   29360128 : layers.29.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 267 :   29360128 : layers.29.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 268 :   29360128 : layers.29.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 269 :       8192 : layers.29.attention_norm.weight     : [8192]          : torch.bfloat16
 270 :       8192 : layers.29.ffn_norm.weight           : [8192]          : torch.bfloat16
 271 :    8388608 : layers.30.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 272 :    1048576 : layers.30.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 273 :    1048576 : layers.30.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 274 :    8388608 : layers.30.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 275 :   29360128 : layers.30.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 276 :   29360128 : layers.30.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 277 :   29360128 : layers.30.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 278 :       8192 : layers.30.attention_norm.weight     : [8192]          : torch.bfloat16
 279 :       8192 : layers.30.ffn_norm.weight           : [8192]          : torch.bfloat16
 280 :    8388608 : layers.31.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 281 :    1048576 : layers.31.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 282 :    1048576 : layers.31.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 283 :    8388608 : layers.31.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 284 :   29360128 : layers.31.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 285 :   29360128 : layers.31.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 286 :   29360128 : layers.31.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 287 :       8192 : layers.31.attention_norm.weight     : [8192]          : torch.bfloat16
 288 :       8192 : layers.31.ffn_norm.weight           : [8192]          : torch.bfloat16
 289 :    8388608 : layers.32.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 290 :    1048576 : layers.32.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 291 :    1048576 : layers.32.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 292 :    8388608 : layers.32.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 293 :   29360128 : layers.32.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 294 :   29360128 : layers.32.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 295 :   29360128 : layers.32.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 296 :       8192 : layers.32.attention_norm.weight     : [8192]          : torch.bfloat16
 297 :       8192 : layers.32.ffn_norm.weight           : [8192]          : torch.bfloat16
 298 :    8388608 : layers.33.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 299 :    1048576 : layers.33.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 300 :    1048576 : layers.33.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 301 :    8388608 : layers.33.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 302 :   29360128 : layers.33.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 303 :   29360128 : layers.33.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 304 :   29360128 : layers.33.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 305 :       8192 : layers.33.attention_norm.weight     : [8192]          : torch.bfloat16
 306 :       8192 : layers.33.ffn_norm.weight           : [8192]          : torch.bfloat16
 307 :    8388608 : layers.34.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 308 :    1048576 : layers.34.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 309 :    1048576 : layers.34.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 310 :    8388608 : layers.34.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 311 :   29360128 : layers.34.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 312 :   29360128 : layers.34.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 313 :   29360128 : layers.34.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 314 :       8192 : layers.34.attention_norm.weight     : [8192]          : torch.bfloat16
 315 :       8192 : layers.34.ffn_norm.weight           : [8192]          : torch.bfloat16
 316 :    8388608 : layers.35.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 317 :    1048576 : layers.35.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 318 :    1048576 : layers.35.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 319 :    8388608 : layers.35.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 320 :   29360128 : layers.35.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 321 :   29360128 : layers.35.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 322 :   29360128 : layers.35.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 323 :       8192 : layers.35.attention_norm.weight     : [8192]          : torch.bfloat16
 324 :       8192 : layers.35.ffn_norm.weight           : [8192]          : torch.bfloat16
 325 :    8388608 : layers.36.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 326 :    1048576 : layers.36.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 327 :    1048576 : layers.36.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 328 :    8388608 : layers.36.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 329 :   29360128 : layers.36.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 330 :   29360128 : layers.36.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 331 :   29360128 : layers.36.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 332 :       8192 : layers.36.attention_norm.weight     : [8192]          : torch.bfloat16
 333 :       8192 : layers.36.ffn_norm.weight           : [8192]          : torch.bfloat16
 334 :    8388608 : layers.37.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 335 :    1048576 : layers.37.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 336 :    1048576 : layers.37.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 337 :    8388608 : layers.37.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 338 :   29360128 : layers.37.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 339 :   29360128 : layers.37.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 340 :   29360128 : layers.37.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 341 :       8192 : layers.37.attention_norm.weight     : [8192]          : torch.bfloat16
 342 :       8192 : layers.37.ffn_norm.weight           : [8192]          : torch.bfloat16
 343 :    8388608 : layers.38.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 344 :    1048576 : layers.38.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 345 :    1048576 : layers.38.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 346 :    8388608 : layers.38.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 347 :   29360128 : layers.38.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 348 :   29360128 : layers.38.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 349 :   29360128 : layers.38.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 350 :       8192 : layers.38.attention_norm.weight     : [8192]          : torch.bfloat16
 351 :       8192 : layers.38.ffn_norm.weight           : [8192]          : torch.bfloat16
 352 :    8388608 : layers.39.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 353 :    1048576 : layers.39.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 354 :    1048576 : layers.39.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 355 :    8388608 : layers.39.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 356 :   29360128 : layers.39.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 357 :   29360128 : layers.39.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 358 :   29360128 : layers.39.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 359 :       8192 : layers.39.attention_norm.weight     : [8192]          : torch.bfloat16
 360 :       8192 : layers.39.ffn_norm.weight           : [8192]          : torch.bfloat16
 361 :    8388608 : layers.40.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 362 :    1048576 : layers.40.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 363 :    1048576 : layers.40.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 364 :    8388608 : layers.40.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 365 :   29360128 : layers.40.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 366 :   29360128 : layers.40.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 367 :   29360128 : layers.40.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 368 :       8192 : layers.40.attention_norm.weight     : [8192]          : torch.bfloat16
 369 :       8192 : layers.40.ffn_norm.weight           : [8192]          : torch.bfloat16
 370 :    8388608 : layers.41.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 371 :    1048576 : layers.41.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 372 :    1048576 : layers.41.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 373 :    8388608 : layers.41.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 374 :   29360128 : layers.41.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 375 :   29360128 : layers.41.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 376 :   29360128 : layers.41.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 377 :       8192 : layers.41.attention_norm.weight     : [8192]          : torch.bfloat16
 378 :       8192 : layers.41.ffn_norm.weight           : [8192]          : torch.bfloat16
 379 :    8388608 : layers.42.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 380 :    1048576 : layers.42.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 381 :    1048576 : layers.42.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 382 :    8388608 : layers.42.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 383 :   29360128 : layers.42.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 384 :   29360128 : layers.42.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 385 :   29360128 : layers.42.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 386 :       8192 : layers.42.attention_norm.weight     : [8192]          : torch.bfloat16
 387 :       8192 : layers.42.ffn_norm.weight           : [8192]          : torch.bfloat16
 388 :    8388608 : layers.43.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 389 :    1048576 : layers.43.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 390 :    1048576 : layers.43.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 391 :    8388608 : layers.43.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 392 :   29360128 : layers.43.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 393 :   29360128 : layers.43.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 394 :   29360128 : layers.43.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 395 :       8192 : layers.43.attention_norm.weight     : [8192]          : torch.bfloat16
 396 :       8192 : layers.43.ffn_norm.weight           : [8192]          : torch.bfloat16
 397 :    8388608 : layers.44.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 398 :    1048576 : layers.44.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 399 :    1048576 : layers.44.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 400 :    8388608 : layers.44.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 401 :   29360128 : layers.44.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 402 :   29360128 : layers.44.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 403 :   29360128 : layers.44.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 404 :       8192 : layers.44.attention_norm.weight     : [8192]          : torch.bfloat16
 405 :       8192 : layers.44.ffn_norm.weight           : [8192]          : torch.bfloat16
 406 :    8388608 : layers.45.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 407 :    1048576 : layers.45.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 408 :    1048576 : layers.45.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 409 :    8388608 : layers.45.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 410 :   29360128 : layers.45.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 411 :   29360128 : layers.45.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 412 :   29360128 : layers.45.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 413 :       8192 : layers.45.attention_norm.weight     : [8192]          : torch.bfloat16
 414 :       8192 : layers.45.ffn_norm.weight           : [8192]          : torch.bfloat16
 415 :    8388608 : layers.46.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 416 :    1048576 : layers.46.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 417 :    1048576 : layers.46.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 418 :    8388608 : layers.46.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 419 :   29360128 : layers.46.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 420 :   29360128 : layers.46.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 421 :   29360128 : layers.46.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 422 :       8192 : layers.46.attention_norm.weight     : [8192]          : torch.bfloat16
 423 :       8192 : layers.46.ffn_norm.weight           : [8192]          : torch.bfloat16
 424 :    8388608 : layers.47.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 425 :    1048576 : layers.47.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 426 :    1048576 : layers.47.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 427 :    8388608 : layers.47.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 428 :   29360128 : layers.47.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 429 :   29360128 : layers.47.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 430 :   29360128 : layers.47.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 431 :       8192 : layers.47.attention_norm.weight     : [8192]          : torch.bfloat16
 432 :       8192 : layers.47.ffn_norm.weight           : [8192]          : torch.bfloat16
 433 :    8388608 : layers.48.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 434 :    1048576 : layers.48.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 435 :    1048576 : layers.48.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 436 :    8388608 : layers.48.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 437 :   29360128 : layers.48.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 438 :   29360128 : layers.48.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 439 :   29360128 : layers.48.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 440 :       8192 : layers.48.attention_norm.weight     : [8192]          : torch.bfloat16
 441 :       8192 : layers.48.ffn_norm.weight           : [8192]          : torch.bfloat16
 442 :    8388608 : layers.49.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 443 :    1048576 : layers.49.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 444 :    1048576 : layers.49.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 445 :    8388608 : layers.49.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 446 :   29360128 : layers.49.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 447 :   29360128 : layers.49.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 448 :   29360128 : layers.49.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 449 :       8192 : layers.49.attention_norm.weight     : [8192]          : torch.bfloat16
 450 :       8192 : layers.49.ffn_norm.weight           : [8192]          : torch.bfloat16
 451 :    8388608 : layers.50.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 452 :    1048576 : layers.50.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 453 :    1048576 : layers.50.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 454 :    8388608 : layers.50.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 455 :   29360128 : layers.50.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 456 :   29360128 : layers.50.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 457 :   29360128 : layers.50.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 458 :       8192 : layers.50.attention_norm.weight     : [8192]          : torch.bfloat16
 459 :       8192 : layers.50.ffn_norm.weight           : [8192]          : torch.bfloat16
 460 :    8388608 : layers.51.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 461 :    1048576 : layers.51.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 462 :    1048576 : layers.51.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 463 :    8388608 : layers.51.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 464 :   29360128 : layers.51.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 465 :   29360128 : layers.51.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 466 :   29360128 : layers.51.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 467 :       8192 : layers.51.attention_norm.weight     : [8192]          : torch.bfloat16
 468 :       8192 : layers.51.ffn_norm.weight           : [8192]          : torch.bfloat16
 469 :    8388608 : layers.52.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 470 :    1048576 : layers.52.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 471 :    1048576 : layers.52.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 472 :    8388608 : layers.52.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 473 :   29360128 : layers.52.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 474 :   29360128 : layers.52.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 475 :   29360128 : layers.52.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 476 :       8192 : layers.52.attention_norm.weight     : [8192]          : torch.bfloat16
 477 :       8192 : layers.52.ffn_norm.weight           : [8192]          : torch.bfloat16
 478 :    8388608 : layers.53.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 479 :    1048576 : layers.53.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 480 :    1048576 : layers.53.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 481 :    8388608 : layers.53.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 482 :   29360128 : layers.53.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 483 :   29360128 : layers.53.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 484 :   29360128 : layers.53.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 485 :       8192 : layers.53.attention_norm.weight     : [8192]          : torch.bfloat16
 486 :       8192 : layers.53.ffn_norm.weight           : [8192]          : torch.bfloat16
 487 :    8388608 : layers.54.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 488 :    1048576 : layers.54.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 489 :    1048576 : layers.54.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 490 :    8388608 : layers.54.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 491 :   29360128 : layers.54.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 492 :   29360128 : layers.54.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 493 :   29360128 : layers.54.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 494 :       8192 : layers.54.attention_norm.weight     : [8192]          : torch.bfloat16
 495 :       8192 : layers.54.ffn_norm.weight           : [8192]          : torch.bfloat16
 496 :    8388608 : layers.55.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 497 :    1048576 : layers.55.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 498 :    1048576 : layers.55.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 499 :    8388608 : layers.55.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 500 :   29360128 : layers.55.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 501 :   29360128 : layers.55.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 502 :   29360128 : layers.55.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 503 :       8192 : layers.55.attention_norm.weight     : [8192]          : torch.bfloat16
 504 :       8192 : layers.55.ffn_norm.weight           : [8192]          : torch.bfloat16
 505 :    8388608 : layers.56.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 506 :    1048576 : layers.56.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 507 :    1048576 : layers.56.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 508 :    8388608 : layers.56.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 509 :   29360128 : layers.56.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 510 :   29360128 : layers.56.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 511 :   29360128 : layers.56.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 512 :       8192 : layers.56.attention_norm.weight     : [8192]          : torch.bfloat16
 513 :       8192 : layers.56.ffn_norm.weight           : [8192]          : torch.bfloat16
 514 :    8388608 : layers.57.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 515 :    1048576 : layers.57.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 516 :    1048576 : layers.57.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 517 :    8388608 : layers.57.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 518 :   29360128 : layers.57.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 519 :   29360128 : layers.57.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 520 :   29360128 : layers.57.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 521 :       8192 : layers.57.attention_norm.weight     : [8192]          : torch.bfloat16
 522 :       8192 : layers.57.ffn_norm.weight           : [8192]          : torch.bfloat16
 523 :    8388608 : layers.58.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 524 :    1048576 : layers.58.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 525 :    1048576 : layers.58.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 526 :    8388608 : layers.58.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 527 :   29360128 : layers.58.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 528 :   29360128 : layers.58.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 529 :   29360128 : layers.58.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 530 :       8192 : layers.58.attention_norm.weight     : [8192]          : torch.bfloat16
 531 :       8192 : layers.58.ffn_norm.weight           : [8192]          : torch.bfloat16
 532 :    8388608 : layers.59.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 533 :    1048576 : layers.59.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 534 :    1048576 : layers.59.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 535 :    8388608 : layers.59.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 536 :   29360128 : layers.59.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 537 :   29360128 : layers.59.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 538 :   29360128 : layers.59.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 539 :       8192 : layers.59.attention_norm.weight     : [8192]          : torch.bfloat16
 540 :       8192 : layers.59.ffn_norm.weight           : [8192]          : torch.bfloat16
 541 :    8388608 : layers.60.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 542 :    1048576 : layers.60.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 543 :    1048576 : layers.60.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 544 :    8388608 : layers.60.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 545 :   29360128 : layers.60.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 546 :   29360128 : layers.60.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 547 :   29360128 : layers.60.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 548 :       8192 : layers.60.attention_norm.weight     : [8192]          : torch.bfloat16
 549 :       8192 : layers.60.ffn_norm.weight           : [8192]          : torch.bfloat16
 550 :    8388608 : layers.61.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 551 :    1048576 : layers.61.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 552 :    1048576 : layers.61.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 553 :    8388608 : layers.61.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 554 :   29360128 : layers.61.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 555 :   29360128 : layers.61.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 556 :   29360128 : layers.61.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 557 :       8192 : layers.61.attention_norm.weight     : [8192]          : torch.bfloat16
 558 :       8192 : layers.61.ffn_norm.weight           : [8192]          : torch.bfloat16
 559 :    8388608 : layers.62.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 560 :    1048576 : layers.62.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 561 :    1048576 : layers.62.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 562 :    8388608 : layers.62.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 563 :   29360128 : layers.62.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 564 :   29360128 : layers.62.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 565 :   29360128 : layers.62.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 566 :       8192 : layers.62.attention_norm.weight     : [8192]          : torch.bfloat16
 567 :       8192 : layers.62.ffn_norm.weight           : [8192]          : torch.bfloat16
 568 :    8388608 : layers.63.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 569 :    1048576 : layers.63.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 570 :    1048576 : layers.63.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 571 :    8388608 : layers.63.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 572 :   29360128 : layers.63.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 573 :   29360128 : layers.63.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 574 :   29360128 : layers.63.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 575 :       8192 : layers.63.attention_norm.weight     : [8192]          : torch.bfloat16
 576 :       8192 : layers.63.ffn_norm.weight           : [8192]          : torch.bfloat16
 577 :    8388608 : layers.64.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 578 :    1048576 : layers.64.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 579 :    1048576 : layers.64.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 580 :    8388608 : layers.64.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 581 :   29360128 : layers.64.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 582 :   29360128 : layers.64.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 583 :   29360128 : layers.64.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 584 :       8192 : layers.64.attention_norm.weight     : [8192]          : torch.bfloat16
 585 :       8192 : layers.64.ffn_norm.weight           : [8192]          : torch.bfloat16
 586 :    8388608 : layers.65.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 587 :    1048576 : layers.65.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 588 :    1048576 : layers.65.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 589 :    8388608 : layers.65.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 590 :   29360128 : layers.65.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 591 :   29360128 : layers.65.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 592 :   29360128 : layers.65.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 593 :       8192 : layers.65.attention_norm.weight     : [8192]          : torch.bfloat16
 594 :       8192 : layers.65.ffn_norm.weight           : [8192]          : torch.bfloat16
 595 :    8388608 : layers.66.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 596 :    1048576 : layers.66.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 597 :    1048576 : layers.66.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 598 :    8388608 : layers.66.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 599 :   29360128 : layers.66.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 600 :   29360128 : layers.66.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 601 :   29360128 : layers.66.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 602 :       8192 : layers.66.attention_norm.weight     : [8192]          : torch.bfloat16
 603 :       8192 : layers.66.ffn_norm.weight           : [8192]          : torch.bfloat16
 604 :    8388608 : layers.67.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 605 :    1048576 : layers.67.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 606 :    1048576 : layers.67.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 607 :    8388608 : layers.67.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 608 :   29360128 : layers.67.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 609 :   29360128 : layers.67.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 610 :   29360128 : layers.67.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 611 :       8192 : layers.67.attention_norm.weight     : [8192]          : torch.bfloat16
 612 :       8192 : layers.67.ffn_norm.weight           : [8192]          : torch.bfloat16
 613 :    8388608 : layers.68.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 614 :    1048576 : layers.68.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 615 :    1048576 : layers.68.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 616 :    8388608 : layers.68.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 617 :   29360128 : layers.68.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 618 :   29360128 : layers.68.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 619 :   29360128 : layers.68.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 620 :       8192 : layers.68.attention_norm.weight     : [8192]          : torch.bfloat16
 621 :       8192 : layers.68.ffn_norm.weight           : [8192]          : torch.bfloat16
 622 :    8388608 : layers.69.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 623 :    1048576 : layers.69.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 624 :    1048576 : layers.69.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 625 :    8388608 : layers.69.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 626 :   29360128 : layers.69.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 627 :   29360128 : layers.69.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 628 :   29360128 : layers.69.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 629 :       8192 : layers.69.attention_norm.weight     : [8192]          : torch.bfloat16
 630 :       8192 : layers.69.ffn_norm.weight           : [8192]          : torch.bfloat16
 631 :    8388608 : layers.70.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 632 :    1048576 : layers.70.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 633 :    1048576 : layers.70.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 634 :    8388608 : layers.70.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 635 :   29360128 : layers.70.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 636 :   29360128 : layers.70.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 637 :   29360128 : layers.70.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 638 :       8192 : layers.70.attention_norm.weight     : [8192]          : torch.bfloat16
 639 :       8192 : layers.70.ffn_norm.weight           : [8192]          : torch.bfloat16
 640 :    8388608 : layers.71.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 641 :    1048576 : layers.71.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 642 :    1048576 : layers.71.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 643 :    8388608 : layers.71.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 644 :   29360128 : layers.71.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 645 :   29360128 : layers.71.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 646 :   29360128 : layers.71.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 647 :       8192 : layers.71.attention_norm.weight     : [8192]          : torch.bfloat16
 648 :       8192 : layers.71.ffn_norm.weight           : [8192]          : torch.bfloat16
 649 :    8388608 : layers.72.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 650 :    1048576 : layers.72.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 651 :    1048576 : layers.72.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 652 :    8388608 : layers.72.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 653 :   29360128 : layers.72.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 654 :   29360128 : layers.72.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 655 :   29360128 : layers.72.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 656 :       8192 : layers.72.attention_norm.weight     : [8192]          : torch.bfloat16
 657 :       8192 : layers.72.ffn_norm.weight           : [8192]          : torch.bfloat16
 658 :    8388608 : layers.73.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 659 :    1048576 : layers.73.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 660 :    1048576 : layers.73.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 661 :    8388608 : layers.73.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 662 :   29360128 : layers.73.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 663 :   29360128 : layers.73.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 664 :   29360128 : layers.73.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 665 :       8192 : layers.73.attention_norm.weight     : [8192]          : torch.bfloat16
 666 :       8192 : layers.73.ffn_norm.weight           : [8192]          : torch.bfloat16
 667 :    8388608 : layers.74.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 668 :    1048576 : layers.74.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 669 :    1048576 : layers.74.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 670 :    8388608 : layers.74.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 671 :   29360128 : layers.74.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 672 :   29360128 : layers.74.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 673 :   29360128 : layers.74.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 674 :       8192 : layers.74.attention_norm.weight     : [8192]          : torch.bfloat16
 675 :       8192 : layers.74.ffn_norm.weight           : [8192]          : torch.bfloat16
 676 :    8388608 : layers.75.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 677 :    1048576 : layers.75.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 678 :    1048576 : layers.75.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 679 :    8388608 : layers.75.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 680 :   29360128 : layers.75.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 681 :   29360128 : layers.75.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 682 :   29360128 : layers.75.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 683 :       8192 : layers.75.attention_norm.weight     : [8192]          : torch.bfloat16
 684 :       8192 : layers.75.ffn_norm.weight           : [8192]          : torch.bfloat16
 685 :    8388608 : layers.76.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 686 :    1048576 : layers.76.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 687 :    1048576 : layers.76.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 688 :    8388608 : layers.76.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 689 :   29360128 : layers.76.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 690 :   29360128 : layers.76.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 691 :   29360128 : layers.76.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 692 :       8192 : layers.76.attention_norm.weight     : [8192]          : torch.bfloat16
 693 :       8192 : layers.76.ffn_norm.weight           : [8192]          : torch.bfloat16
 694 :    8388608 : layers.77.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 695 :    1048576 : layers.77.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 696 :    1048576 : layers.77.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 697 :    8388608 : layers.77.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 698 :   29360128 : layers.77.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 699 :   29360128 : layers.77.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 700 :   29360128 : layers.77.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 701 :       8192 : layers.77.attention_norm.weight     : [8192]          : torch.bfloat16
 702 :       8192 : layers.77.ffn_norm.weight           : [8192]          : torch.bfloat16
 703 :    8388608 : layers.78.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 704 :    1048576 : layers.78.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 705 :    1048576 : layers.78.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 706 :    8388608 : layers.78.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 707 :   29360128 : layers.78.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 708 :   29360128 : layers.78.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 709 :   29360128 : layers.78.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 710 :       8192 : layers.78.attention_norm.weight     : [8192]          : torch.bfloat16
 711 :       8192 : layers.78.ffn_norm.weight           : [8192]          : torch.bfloat16
 712 :    8388608 : layers.79.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 713 :    1048576 : layers.79.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 714 :    1048576 : layers.79.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 715 :    8388608 : layers.79.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 716 :   29360128 : layers.79.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 717 :   29360128 : layers.79.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 718 :   29360128 : layers.79.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 719 :       8192 : layers.79.attention_norm.weight     : [8192]          : torch.bfloat16
 720 :       8192 : layers.79.ffn_norm.weight           : [8192]          : torch.bfloat16
 721 :       8192 : norm.weight                         : [8192]          : torch.bfloat16
 722 :  131334144 : output.weight                       : [16032, 8192]   : torch.bfloat16

-----------------------------------------------------------------------------
35281469440 params in total.
70562938880 bytes in total.
24.61 sec, 6.07 sec, 2734.13 MB/s
-----------------------------------------------------------------------------

[5/8]: Loading original/consolidated.04.pth
   0 :  131334144 : tok_embeddings.weight               : [16032, 8192]   : torch.bfloat16
   1 :    8388608 : layers.0.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
   2 :    1048576 : layers.0.attention.wk.weight        : [128, 8192]     : torch.bfloat16
   3 :    1048576 : layers.0.attention.wv.weight        : [128, 8192]     : torch.bfloat16
   4 :    8388608 : layers.0.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
   5 :   29360128 : layers.0.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
   6 :   29360128 : layers.0.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
   7 :   29360128 : layers.0.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
   8 :       8192 : layers.0.attention_norm.weight      : [8192]          : torch.bfloat16
   9 :       8192 : layers.0.ffn_norm.weight            : [8192]          : torch.bfloat16
  10 :    8388608 : layers.1.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  11 :    1048576 : layers.1.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  12 :    1048576 : layers.1.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  13 :    8388608 : layers.1.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  14 :   29360128 : layers.1.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  15 :   29360128 : layers.1.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  16 :   29360128 : layers.1.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  17 :       8192 : layers.1.attention_norm.weight      : [8192]          : torch.bfloat16
  18 :       8192 : layers.1.ffn_norm.weight            : [8192]          : torch.bfloat16
  19 :    8388608 : layers.2.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  20 :    1048576 : layers.2.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  21 :    1048576 : layers.2.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  22 :    8388608 : layers.2.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  23 :   29360128 : layers.2.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  24 :   29360128 : layers.2.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  25 :   29360128 : layers.2.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  26 :       8192 : layers.2.attention_norm.weight      : [8192]          : torch.bfloat16
  27 :       8192 : layers.2.ffn_norm.weight            : [8192]          : torch.bfloat16
  28 :    8388608 : layers.3.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  29 :    1048576 : layers.3.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  30 :    1048576 : layers.3.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  31 :    8388608 : layers.3.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  32 :   29360128 : layers.3.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  33 :   29360128 : layers.3.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  34 :   29360128 : layers.3.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  35 :       8192 : layers.3.attention_norm.weight      : [8192]          : torch.bfloat16
  36 :       8192 : layers.3.ffn_norm.weight            : [8192]          : torch.bfloat16
  37 :    8388608 : layers.4.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  38 :    1048576 : layers.4.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  39 :    1048576 : layers.4.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  40 :    8388608 : layers.4.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  41 :   29360128 : layers.4.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  42 :   29360128 : layers.4.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  43 :   29360128 : layers.4.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  44 :       8192 : layers.4.attention_norm.weight      : [8192]          : torch.bfloat16
  45 :       8192 : layers.4.ffn_norm.weight            : [8192]          : torch.bfloat16
  46 :    8388608 : layers.5.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  47 :    1048576 : layers.5.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  48 :    1048576 : layers.5.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  49 :    8388608 : layers.5.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  50 :   29360128 : layers.5.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  51 :   29360128 : layers.5.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  52 :   29360128 : layers.5.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  53 :       8192 : layers.5.attention_norm.weight      : [8192]          : torch.bfloat16
  54 :       8192 : layers.5.ffn_norm.weight            : [8192]          : torch.bfloat16
  55 :    8388608 : layers.6.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  56 :    1048576 : layers.6.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  57 :    1048576 : layers.6.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  58 :    8388608 : layers.6.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  59 :   29360128 : layers.6.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  60 :   29360128 : layers.6.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  61 :   29360128 : layers.6.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  62 :       8192 : layers.6.attention_norm.weight      : [8192]          : torch.bfloat16
  63 :       8192 : layers.6.ffn_norm.weight            : [8192]          : torch.bfloat16
  64 :    8388608 : layers.7.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  65 :    1048576 : layers.7.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  66 :    1048576 : layers.7.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  67 :    8388608 : layers.7.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  68 :   29360128 : layers.7.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  69 :   29360128 : layers.7.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  70 :   29360128 : layers.7.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  71 :       8192 : layers.7.attention_norm.weight      : [8192]          : torch.bfloat16
  72 :       8192 : layers.7.ffn_norm.weight            : [8192]          : torch.bfloat16
  73 :    8388608 : layers.8.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  74 :    1048576 : layers.8.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  75 :    1048576 : layers.8.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  76 :    8388608 : layers.8.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  77 :   29360128 : layers.8.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  78 :   29360128 : layers.8.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  79 :   29360128 : layers.8.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  80 :       8192 : layers.8.attention_norm.weight      : [8192]          : torch.bfloat16
  81 :       8192 : layers.8.ffn_norm.weight            : [8192]          : torch.bfloat16
  82 :    8388608 : layers.9.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  83 :    1048576 : layers.9.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  84 :    1048576 : layers.9.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  85 :    8388608 : layers.9.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  86 :   29360128 : layers.9.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  87 :   29360128 : layers.9.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  88 :   29360128 : layers.9.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  89 :       8192 : layers.9.attention_norm.weight      : [8192]          : torch.bfloat16
  90 :       8192 : layers.9.ffn_norm.weight            : [8192]          : torch.bfloat16
  91 :    8388608 : layers.10.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
  92 :    1048576 : layers.10.attention.wk.weight       : [128, 8192]     : torch.bfloat16
  93 :    1048576 : layers.10.attention.wv.weight       : [128, 8192]     : torch.bfloat16
  94 :    8388608 : layers.10.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
  95 :   29360128 : layers.10.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
  96 :   29360128 : layers.10.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
  97 :   29360128 : layers.10.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
  98 :       8192 : layers.10.attention_norm.weight     : [8192]          : torch.bfloat16
  99 :       8192 : layers.10.ffn_norm.weight           : [8192]          : torch.bfloat16
 100 :    8388608 : layers.11.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 101 :    1048576 : layers.11.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 102 :    1048576 : layers.11.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 103 :    8388608 : layers.11.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 104 :   29360128 : layers.11.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 105 :   29360128 : layers.11.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 106 :   29360128 : layers.11.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 107 :       8192 : layers.11.attention_norm.weight     : [8192]          : torch.bfloat16
 108 :       8192 : layers.11.ffn_norm.weight           : [8192]          : torch.bfloat16
 109 :    8388608 : layers.12.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 110 :    1048576 : layers.12.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 111 :    1048576 : layers.12.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 112 :    8388608 : layers.12.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 113 :   29360128 : layers.12.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 114 :   29360128 : layers.12.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 115 :   29360128 : layers.12.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 116 :       8192 : layers.12.attention_norm.weight     : [8192]          : torch.bfloat16
 117 :       8192 : layers.12.ffn_norm.weight           : [8192]          : torch.bfloat16
 118 :    8388608 : layers.13.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 119 :    1048576 : layers.13.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 120 :    1048576 : layers.13.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 121 :    8388608 : layers.13.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 122 :   29360128 : layers.13.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 123 :   29360128 : layers.13.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 124 :   29360128 : layers.13.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 125 :       8192 : layers.13.attention_norm.weight     : [8192]          : torch.bfloat16
 126 :       8192 : layers.13.ffn_norm.weight           : [8192]          : torch.bfloat16
 127 :    8388608 : layers.14.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 128 :    1048576 : layers.14.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 129 :    1048576 : layers.14.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 130 :    8388608 : layers.14.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 131 :   29360128 : layers.14.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 132 :   29360128 : layers.14.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 133 :   29360128 : layers.14.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 134 :       8192 : layers.14.attention_norm.weight     : [8192]          : torch.bfloat16
 135 :       8192 : layers.14.ffn_norm.weight           : [8192]          : torch.bfloat16
 136 :    8388608 : layers.15.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 137 :    1048576 : layers.15.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 138 :    1048576 : layers.15.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 139 :    8388608 : layers.15.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 140 :   29360128 : layers.15.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 141 :   29360128 : layers.15.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 142 :   29360128 : layers.15.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 143 :       8192 : layers.15.attention_norm.weight     : [8192]          : torch.bfloat16
 144 :       8192 : layers.15.ffn_norm.weight           : [8192]          : torch.bfloat16
 145 :    8388608 : layers.16.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 146 :    1048576 : layers.16.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 147 :    1048576 : layers.16.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 148 :    8388608 : layers.16.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 149 :   29360128 : layers.16.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 150 :   29360128 : layers.16.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 151 :   29360128 : layers.16.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 152 :       8192 : layers.16.attention_norm.weight     : [8192]          : torch.bfloat16
 153 :       8192 : layers.16.ffn_norm.weight           : [8192]          : torch.bfloat16
 154 :    8388608 : layers.17.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 155 :    1048576 : layers.17.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 156 :    1048576 : layers.17.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 157 :    8388608 : layers.17.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 158 :   29360128 : layers.17.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 159 :   29360128 : layers.17.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 160 :   29360128 : layers.17.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 161 :       8192 : layers.17.attention_norm.weight     : [8192]          : torch.bfloat16
 162 :       8192 : layers.17.ffn_norm.weight           : [8192]          : torch.bfloat16
 163 :    8388608 : layers.18.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 164 :    1048576 : layers.18.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 165 :    1048576 : layers.18.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 166 :    8388608 : layers.18.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 167 :   29360128 : layers.18.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 168 :   29360128 : layers.18.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 169 :   29360128 : layers.18.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 170 :       8192 : layers.18.attention_norm.weight     : [8192]          : torch.bfloat16
 171 :       8192 : layers.18.ffn_norm.weight           : [8192]          : torch.bfloat16
 172 :    8388608 : layers.19.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 173 :    1048576 : layers.19.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 174 :    1048576 : layers.19.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 175 :    8388608 : layers.19.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 176 :   29360128 : layers.19.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 177 :   29360128 : layers.19.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 178 :   29360128 : layers.19.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 179 :       8192 : layers.19.attention_norm.weight     : [8192]          : torch.bfloat16
 180 :       8192 : layers.19.ffn_norm.weight           : [8192]          : torch.bfloat16
 181 :    8388608 : layers.20.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 182 :    1048576 : layers.20.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 183 :    1048576 : layers.20.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 184 :    8388608 : layers.20.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 185 :   29360128 : layers.20.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 186 :   29360128 : layers.20.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 187 :   29360128 : layers.20.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 188 :       8192 : layers.20.attention_norm.weight     : [8192]          : torch.bfloat16
 189 :       8192 : layers.20.ffn_norm.weight           : [8192]          : torch.bfloat16
 190 :    8388608 : layers.21.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 191 :    1048576 : layers.21.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 192 :    1048576 : layers.21.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 193 :    8388608 : layers.21.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 194 :   29360128 : layers.21.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 195 :   29360128 : layers.21.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 196 :   29360128 : layers.21.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 197 :       8192 : layers.21.attention_norm.weight     : [8192]          : torch.bfloat16
 198 :       8192 : layers.21.ffn_norm.weight           : [8192]          : torch.bfloat16
 199 :    8388608 : layers.22.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 200 :    1048576 : layers.22.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 201 :    1048576 : layers.22.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 202 :    8388608 : layers.22.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 203 :   29360128 : layers.22.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 204 :   29360128 : layers.22.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 205 :   29360128 : layers.22.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 206 :       8192 : layers.22.attention_norm.weight     : [8192]          : torch.bfloat16
 207 :       8192 : layers.22.ffn_norm.weight           : [8192]          : torch.bfloat16
 208 :    8388608 : layers.23.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 209 :    1048576 : layers.23.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 210 :    1048576 : layers.23.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 211 :    8388608 : layers.23.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 212 :   29360128 : layers.23.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 213 :   29360128 : layers.23.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 214 :   29360128 : layers.23.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 215 :       8192 : layers.23.attention_norm.weight     : [8192]          : torch.bfloat16
 216 :       8192 : layers.23.ffn_norm.weight           : [8192]          : torch.bfloat16
 217 :    8388608 : layers.24.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 218 :    1048576 : layers.24.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 219 :    1048576 : layers.24.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 220 :    8388608 : layers.24.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 221 :   29360128 : layers.24.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 222 :   29360128 : layers.24.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 223 :   29360128 : layers.24.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 224 :       8192 : layers.24.attention_norm.weight     : [8192]          : torch.bfloat16
 225 :       8192 : layers.24.ffn_norm.weight           : [8192]          : torch.bfloat16
 226 :    8388608 : layers.25.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 227 :    1048576 : layers.25.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 228 :    1048576 : layers.25.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 229 :    8388608 : layers.25.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 230 :   29360128 : layers.25.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 231 :   29360128 : layers.25.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 232 :   29360128 : layers.25.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 233 :       8192 : layers.25.attention_norm.weight     : [8192]          : torch.bfloat16
 234 :       8192 : layers.25.ffn_norm.weight           : [8192]          : torch.bfloat16
 235 :    8388608 : layers.26.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 236 :    1048576 : layers.26.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 237 :    1048576 : layers.26.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 238 :    8388608 : layers.26.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 239 :   29360128 : layers.26.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 240 :   29360128 : layers.26.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 241 :   29360128 : layers.26.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 242 :       8192 : layers.26.attention_norm.weight     : [8192]          : torch.bfloat16
 243 :       8192 : layers.26.ffn_norm.weight           : [8192]          : torch.bfloat16
 244 :    8388608 : layers.27.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 245 :    1048576 : layers.27.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 246 :    1048576 : layers.27.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 247 :    8388608 : layers.27.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 248 :   29360128 : layers.27.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 249 :   29360128 : layers.27.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 250 :   29360128 : layers.27.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 251 :       8192 : layers.27.attention_norm.weight     : [8192]          : torch.bfloat16
 252 :       8192 : layers.27.ffn_norm.weight           : [8192]          : torch.bfloat16
 253 :    8388608 : layers.28.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 254 :    1048576 : layers.28.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 255 :    1048576 : layers.28.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 256 :    8388608 : layers.28.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 257 :   29360128 : layers.28.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 258 :   29360128 : layers.28.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 259 :   29360128 : layers.28.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 260 :       8192 : layers.28.attention_norm.weight     : [8192]          : torch.bfloat16
 261 :       8192 : layers.28.ffn_norm.weight           : [8192]          : torch.bfloat16
 262 :    8388608 : layers.29.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 263 :    1048576 : layers.29.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 264 :    1048576 : layers.29.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 265 :    8388608 : layers.29.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 266 :   29360128 : layers.29.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 267 :   29360128 : layers.29.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 268 :   29360128 : layers.29.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 269 :       8192 : layers.29.attention_norm.weight     : [8192]          : torch.bfloat16
 270 :       8192 : layers.29.ffn_norm.weight           : [8192]          : torch.bfloat16
 271 :    8388608 : layers.30.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 272 :    1048576 : layers.30.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 273 :    1048576 : layers.30.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 274 :    8388608 : layers.30.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 275 :   29360128 : layers.30.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 276 :   29360128 : layers.30.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 277 :   29360128 : layers.30.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 278 :       8192 : layers.30.attention_norm.weight     : [8192]          : torch.bfloat16
 279 :       8192 : layers.30.ffn_norm.weight           : [8192]          : torch.bfloat16
 280 :    8388608 : layers.31.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 281 :    1048576 : layers.31.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 282 :    1048576 : layers.31.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 283 :    8388608 : layers.31.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 284 :   29360128 : layers.31.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 285 :   29360128 : layers.31.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 286 :   29360128 : layers.31.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 287 :       8192 : layers.31.attention_norm.weight     : [8192]          : torch.bfloat16
 288 :       8192 : layers.31.ffn_norm.weight           : [8192]          : torch.bfloat16
 289 :    8388608 : layers.32.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 290 :    1048576 : layers.32.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 291 :    1048576 : layers.32.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 292 :    8388608 : layers.32.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 293 :   29360128 : layers.32.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 294 :   29360128 : layers.32.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 295 :   29360128 : layers.32.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 296 :       8192 : layers.32.attention_norm.weight     : [8192]          : torch.bfloat16
 297 :       8192 : layers.32.ffn_norm.weight           : [8192]          : torch.bfloat16
 298 :    8388608 : layers.33.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 299 :    1048576 : layers.33.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 300 :    1048576 : layers.33.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 301 :    8388608 : layers.33.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 302 :   29360128 : layers.33.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 303 :   29360128 : layers.33.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 304 :   29360128 : layers.33.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 305 :       8192 : layers.33.attention_norm.weight     : [8192]          : torch.bfloat16
 306 :       8192 : layers.33.ffn_norm.weight           : [8192]          : torch.bfloat16
 307 :    8388608 : layers.34.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 308 :    1048576 : layers.34.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 309 :    1048576 : layers.34.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 310 :    8388608 : layers.34.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 311 :   29360128 : layers.34.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 312 :   29360128 : layers.34.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 313 :   29360128 : layers.34.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 314 :       8192 : layers.34.attention_norm.weight     : [8192]          : torch.bfloat16
 315 :       8192 : layers.34.ffn_norm.weight           : [8192]          : torch.bfloat16
 316 :    8388608 : layers.35.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 317 :    1048576 : layers.35.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 318 :    1048576 : layers.35.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 319 :    8388608 : layers.35.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 320 :   29360128 : layers.35.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 321 :   29360128 : layers.35.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 322 :   29360128 : layers.35.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 323 :       8192 : layers.35.attention_norm.weight     : [8192]          : torch.bfloat16
 324 :       8192 : layers.35.ffn_norm.weight           : [8192]          : torch.bfloat16
 325 :    8388608 : layers.36.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 326 :    1048576 : layers.36.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 327 :    1048576 : layers.36.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 328 :    8388608 : layers.36.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 329 :   29360128 : layers.36.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 330 :   29360128 : layers.36.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 331 :   29360128 : layers.36.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 332 :       8192 : layers.36.attention_norm.weight     : [8192]          : torch.bfloat16
 333 :       8192 : layers.36.ffn_norm.weight           : [8192]          : torch.bfloat16
 334 :    8388608 : layers.37.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 335 :    1048576 : layers.37.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 336 :    1048576 : layers.37.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 337 :    8388608 : layers.37.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 338 :   29360128 : layers.37.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 339 :   29360128 : layers.37.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 340 :   29360128 : layers.37.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 341 :       8192 : layers.37.attention_norm.weight     : [8192]          : torch.bfloat16
 342 :       8192 : layers.37.ffn_norm.weight           : [8192]          : torch.bfloat16
 343 :    8388608 : layers.38.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 344 :    1048576 : layers.38.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 345 :    1048576 : layers.38.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 346 :    8388608 : layers.38.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 347 :   29360128 : layers.38.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 348 :   29360128 : layers.38.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 349 :   29360128 : layers.38.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 350 :       8192 : layers.38.attention_norm.weight     : [8192]          : torch.bfloat16
 351 :       8192 : layers.38.ffn_norm.weight           : [8192]          : torch.bfloat16
 352 :    8388608 : layers.39.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 353 :    1048576 : layers.39.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 354 :    1048576 : layers.39.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 355 :    8388608 : layers.39.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 356 :   29360128 : layers.39.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 357 :   29360128 : layers.39.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 358 :   29360128 : layers.39.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 359 :       8192 : layers.39.attention_norm.weight     : [8192]          : torch.bfloat16
 360 :       8192 : layers.39.ffn_norm.weight           : [8192]          : torch.bfloat16
 361 :    8388608 : layers.40.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 362 :    1048576 : layers.40.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 363 :    1048576 : layers.40.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 364 :    8388608 : layers.40.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 365 :   29360128 : layers.40.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 366 :   29360128 : layers.40.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 367 :   29360128 : layers.40.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 368 :       8192 : layers.40.attention_norm.weight     : [8192]          : torch.bfloat16
 369 :       8192 : layers.40.ffn_norm.weight           : [8192]          : torch.bfloat16
 370 :    8388608 : layers.41.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 371 :    1048576 : layers.41.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 372 :    1048576 : layers.41.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 373 :    8388608 : layers.41.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 374 :   29360128 : layers.41.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 375 :   29360128 : layers.41.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 376 :   29360128 : layers.41.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 377 :       8192 : layers.41.attention_norm.weight     : [8192]          : torch.bfloat16
 378 :       8192 : layers.41.ffn_norm.weight           : [8192]          : torch.bfloat16
 379 :    8388608 : layers.42.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 380 :    1048576 : layers.42.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 381 :    1048576 : layers.42.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 382 :    8388608 : layers.42.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 383 :   29360128 : layers.42.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 384 :   29360128 : layers.42.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 385 :   29360128 : layers.42.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 386 :       8192 : layers.42.attention_norm.weight     : [8192]          : torch.bfloat16
 387 :       8192 : layers.42.ffn_norm.weight           : [8192]          : torch.bfloat16
 388 :    8388608 : layers.43.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 389 :    1048576 : layers.43.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 390 :    1048576 : layers.43.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 391 :    8388608 : layers.43.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 392 :   29360128 : layers.43.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 393 :   29360128 : layers.43.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 394 :   29360128 : layers.43.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 395 :       8192 : layers.43.attention_norm.weight     : [8192]          : torch.bfloat16
 396 :       8192 : layers.43.ffn_norm.weight           : [8192]          : torch.bfloat16
 397 :    8388608 : layers.44.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 398 :    1048576 : layers.44.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 399 :    1048576 : layers.44.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 400 :    8388608 : layers.44.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 401 :   29360128 : layers.44.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 402 :   29360128 : layers.44.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 403 :   29360128 : layers.44.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 404 :       8192 : layers.44.attention_norm.weight     : [8192]          : torch.bfloat16
 405 :       8192 : layers.44.ffn_norm.weight           : [8192]          : torch.bfloat16
 406 :    8388608 : layers.45.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 407 :    1048576 : layers.45.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 408 :    1048576 : layers.45.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 409 :    8388608 : layers.45.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 410 :   29360128 : layers.45.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 411 :   29360128 : layers.45.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 412 :   29360128 : layers.45.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 413 :       8192 : layers.45.attention_norm.weight     : [8192]          : torch.bfloat16
 414 :       8192 : layers.45.ffn_norm.weight           : [8192]          : torch.bfloat16
 415 :    8388608 : layers.46.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 416 :    1048576 : layers.46.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 417 :    1048576 : layers.46.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 418 :    8388608 : layers.46.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 419 :   29360128 : layers.46.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 420 :   29360128 : layers.46.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 421 :   29360128 : layers.46.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 422 :       8192 : layers.46.attention_norm.weight     : [8192]          : torch.bfloat16
 423 :       8192 : layers.46.ffn_norm.weight           : [8192]          : torch.bfloat16
 424 :    8388608 : layers.47.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 425 :    1048576 : layers.47.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 426 :    1048576 : layers.47.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 427 :    8388608 : layers.47.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 428 :   29360128 : layers.47.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 429 :   29360128 : layers.47.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 430 :   29360128 : layers.47.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 431 :       8192 : layers.47.attention_norm.weight     : [8192]          : torch.bfloat16
 432 :       8192 : layers.47.ffn_norm.weight           : [8192]          : torch.bfloat16
 433 :    8388608 : layers.48.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 434 :    1048576 : layers.48.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 435 :    1048576 : layers.48.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 436 :    8388608 : layers.48.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 437 :   29360128 : layers.48.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 438 :   29360128 : layers.48.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 439 :   29360128 : layers.48.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 440 :       8192 : layers.48.attention_norm.weight     : [8192]          : torch.bfloat16
 441 :       8192 : layers.48.ffn_norm.weight           : [8192]          : torch.bfloat16
 442 :    8388608 : layers.49.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 443 :    1048576 : layers.49.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 444 :    1048576 : layers.49.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 445 :    8388608 : layers.49.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 446 :   29360128 : layers.49.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 447 :   29360128 : layers.49.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 448 :   29360128 : layers.49.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 449 :       8192 : layers.49.attention_norm.weight     : [8192]          : torch.bfloat16
 450 :       8192 : layers.49.ffn_norm.weight           : [8192]          : torch.bfloat16
 451 :    8388608 : layers.50.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 452 :    1048576 : layers.50.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 453 :    1048576 : layers.50.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 454 :    8388608 : layers.50.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 455 :   29360128 : layers.50.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 456 :   29360128 : layers.50.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 457 :   29360128 : layers.50.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 458 :       8192 : layers.50.attention_norm.weight     : [8192]          : torch.bfloat16
 459 :       8192 : layers.50.ffn_norm.weight           : [8192]          : torch.bfloat16
 460 :    8388608 : layers.51.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 461 :    1048576 : layers.51.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 462 :    1048576 : layers.51.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 463 :    8388608 : layers.51.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 464 :   29360128 : layers.51.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 465 :   29360128 : layers.51.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 466 :   29360128 : layers.51.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 467 :       8192 : layers.51.attention_norm.weight     : [8192]          : torch.bfloat16
 468 :       8192 : layers.51.ffn_norm.weight           : [8192]          : torch.bfloat16
 469 :    8388608 : layers.52.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 470 :    1048576 : layers.52.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 471 :    1048576 : layers.52.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 472 :    8388608 : layers.52.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 473 :   29360128 : layers.52.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 474 :   29360128 : layers.52.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 475 :   29360128 : layers.52.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 476 :       8192 : layers.52.attention_norm.weight     : [8192]          : torch.bfloat16
 477 :       8192 : layers.52.ffn_norm.weight           : [8192]          : torch.bfloat16
 478 :    8388608 : layers.53.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 479 :    1048576 : layers.53.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 480 :    1048576 : layers.53.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 481 :    8388608 : layers.53.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 482 :   29360128 : layers.53.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 483 :   29360128 : layers.53.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 484 :   29360128 : layers.53.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 485 :       8192 : layers.53.attention_norm.weight     : [8192]          : torch.bfloat16
 486 :       8192 : layers.53.ffn_norm.weight           : [8192]          : torch.bfloat16
 487 :    8388608 : layers.54.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 488 :    1048576 : layers.54.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 489 :    1048576 : layers.54.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 490 :    8388608 : layers.54.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 491 :   29360128 : layers.54.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 492 :   29360128 : layers.54.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 493 :   29360128 : layers.54.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 494 :       8192 : layers.54.attention_norm.weight     : [8192]          : torch.bfloat16
 495 :       8192 : layers.54.ffn_norm.weight           : [8192]          : torch.bfloat16
 496 :    8388608 : layers.55.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 497 :    1048576 : layers.55.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 498 :    1048576 : layers.55.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 499 :    8388608 : layers.55.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 500 :   29360128 : layers.55.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 501 :   29360128 : layers.55.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 502 :   29360128 : layers.55.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 503 :       8192 : layers.55.attention_norm.weight     : [8192]          : torch.bfloat16
 504 :       8192 : layers.55.ffn_norm.weight           : [8192]          : torch.bfloat16
 505 :    8388608 : layers.56.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 506 :    1048576 : layers.56.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 507 :    1048576 : layers.56.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 508 :    8388608 : layers.56.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 509 :   29360128 : layers.56.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 510 :   29360128 : layers.56.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 511 :   29360128 : layers.56.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 512 :       8192 : layers.56.attention_norm.weight     : [8192]          : torch.bfloat16
 513 :       8192 : layers.56.ffn_norm.weight           : [8192]          : torch.bfloat16
 514 :    8388608 : layers.57.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 515 :    1048576 : layers.57.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 516 :    1048576 : layers.57.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 517 :    8388608 : layers.57.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 518 :   29360128 : layers.57.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 519 :   29360128 : layers.57.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 520 :   29360128 : layers.57.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 521 :       8192 : layers.57.attention_norm.weight     : [8192]          : torch.bfloat16
 522 :       8192 : layers.57.ffn_norm.weight           : [8192]          : torch.bfloat16
 523 :    8388608 : layers.58.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 524 :    1048576 : layers.58.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 525 :    1048576 : layers.58.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 526 :    8388608 : layers.58.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 527 :   29360128 : layers.58.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 528 :   29360128 : layers.58.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 529 :   29360128 : layers.58.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 530 :       8192 : layers.58.attention_norm.weight     : [8192]          : torch.bfloat16
 531 :       8192 : layers.58.ffn_norm.weight           : [8192]          : torch.bfloat16
 532 :    8388608 : layers.59.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 533 :    1048576 : layers.59.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 534 :    1048576 : layers.59.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 535 :    8388608 : layers.59.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 536 :   29360128 : layers.59.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 537 :   29360128 : layers.59.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 538 :   29360128 : layers.59.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 539 :       8192 : layers.59.attention_norm.weight     : [8192]          : torch.bfloat16
 540 :       8192 : layers.59.ffn_norm.weight           : [8192]          : torch.bfloat16
 541 :    8388608 : layers.60.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 542 :    1048576 : layers.60.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 543 :    1048576 : layers.60.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 544 :    8388608 : layers.60.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 545 :   29360128 : layers.60.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 546 :   29360128 : layers.60.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 547 :   29360128 : layers.60.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 548 :       8192 : layers.60.attention_norm.weight     : [8192]          : torch.bfloat16
 549 :       8192 : layers.60.ffn_norm.weight           : [8192]          : torch.bfloat16
 550 :    8388608 : layers.61.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 551 :    1048576 : layers.61.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 552 :    1048576 : layers.61.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 553 :    8388608 : layers.61.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 554 :   29360128 : layers.61.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 555 :   29360128 : layers.61.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 556 :   29360128 : layers.61.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 557 :       8192 : layers.61.attention_norm.weight     : [8192]          : torch.bfloat16
 558 :       8192 : layers.61.ffn_norm.weight           : [8192]          : torch.bfloat16
 559 :    8388608 : layers.62.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 560 :    1048576 : layers.62.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 561 :    1048576 : layers.62.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 562 :    8388608 : layers.62.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 563 :   29360128 : layers.62.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 564 :   29360128 : layers.62.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 565 :   29360128 : layers.62.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 566 :       8192 : layers.62.attention_norm.weight     : [8192]          : torch.bfloat16
 567 :       8192 : layers.62.ffn_norm.weight           : [8192]          : torch.bfloat16
 568 :    8388608 : layers.63.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 569 :    1048576 : layers.63.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 570 :    1048576 : layers.63.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 571 :    8388608 : layers.63.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 572 :   29360128 : layers.63.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 573 :   29360128 : layers.63.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 574 :   29360128 : layers.63.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 575 :       8192 : layers.63.attention_norm.weight     : [8192]          : torch.bfloat16
 576 :       8192 : layers.63.ffn_norm.weight           : [8192]          : torch.bfloat16
 577 :    8388608 : layers.64.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 578 :    1048576 : layers.64.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 579 :    1048576 : layers.64.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 580 :    8388608 : layers.64.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 581 :   29360128 : layers.64.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 582 :   29360128 : layers.64.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 583 :   29360128 : layers.64.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 584 :       8192 : layers.64.attention_norm.weight     : [8192]          : torch.bfloat16
 585 :       8192 : layers.64.ffn_norm.weight           : [8192]          : torch.bfloat16
 586 :    8388608 : layers.65.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 587 :    1048576 : layers.65.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 588 :    1048576 : layers.65.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 589 :    8388608 : layers.65.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 590 :   29360128 : layers.65.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 591 :   29360128 : layers.65.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 592 :   29360128 : layers.65.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 593 :       8192 : layers.65.attention_norm.weight     : [8192]          : torch.bfloat16
 594 :       8192 : layers.65.ffn_norm.weight           : [8192]          : torch.bfloat16
 595 :    8388608 : layers.66.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 596 :    1048576 : layers.66.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 597 :    1048576 : layers.66.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 598 :    8388608 : layers.66.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 599 :   29360128 : layers.66.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 600 :   29360128 : layers.66.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 601 :   29360128 : layers.66.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 602 :       8192 : layers.66.attention_norm.weight     : [8192]          : torch.bfloat16
 603 :       8192 : layers.66.ffn_norm.weight           : [8192]          : torch.bfloat16
 604 :    8388608 : layers.67.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 605 :    1048576 : layers.67.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 606 :    1048576 : layers.67.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 607 :    8388608 : layers.67.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 608 :   29360128 : layers.67.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 609 :   29360128 : layers.67.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 610 :   29360128 : layers.67.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 611 :       8192 : layers.67.attention_norm.weight     : [8192]          : torch.bfloat16
 612 :       8192 : layers.67.ffn_norm.weight           : [8192]          : torch.bfloat16
 613 :    8388608 : layers.68.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 614 :    1048576 : layers.68.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 615 :    1048576 : layers.68.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 616 :    8388608 : layers.68.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 617 :   29360128 : layers.68.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 618 :   29360128 : layers.68.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 619 :   29360128 : layers.68.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 620 :       8192 : layers.68.attention_norm.weight     : [8192]          : torch.bfloat16
 621 :       8192 : layers.68.ffn_norm.weight           : [8192]          : torch.bfloat16
 622 :    8388608 : layers.69.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 623 :    1048576 : layers.69.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 624 :    1048576 : layers.69.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 625 :    8388608 : layers.69.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 626 :   29360128 : layers.69.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 627 :   29360128 : layers.69.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 628 :   29360128 : layers.69.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 629 :       8192 : layers.69.attention_norm.weight     : [8192]          : torch.bfloat16
 630 :       8192 : layers.69.ffn_norm.weight           : [8192]          : torch.bfloat16
 631 :    8388608 : layers.70.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 632 :    1048576 : layers.70.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 633 :    1048576 : layers.70.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 634 :    8388608 : layers.70.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 635 :   29360128 : layers.70.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 636 :   29360128 : layers.70.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 637 :   29360128 : layers.70.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 638 :       8192 : layers.70.attention_norm.weight     : [8192]          : torch.bfloat16
 639 :       8192 : layers.70.ffn_norm.weight           : [8192]          : torch.bfloat16
 640 :    8388608 : layers.71.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 641 :    1048576 : layers.71.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 642 :    1048576 : layers.71.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 643 :    8388608 : layers.71.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 644 :   29360128 : layers.71.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 645 :   29360128 : layers.71.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 646 :   29360128 : layers.71.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 647 :       8192 : layers.71.attention_norm.weight     : [8192]          : torch.bfloat16
 648 :       8192 : layers.71.ffn_norm.weight           : [8192]          : torch.bfloat16
 649 :    8388608 : layers.72.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 650 :    1048576 : layers.72.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 651 :    1048576 : layers.72.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 652 :    8388608 : layers.72.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 653 :   29360128 : layers.72.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 654 :   29360128 : layers.72.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 655 :   29360128 : layers.72.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 656 :       8192 : layers.72.attention_norm.weight     : [8192]          : torch.bfloat16
 657 :       8192 : layers.72.ffn_norm.weight           : [8192]          : torch.bfloat16
 658 :    8388608 : layers.73.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 659 :    1048576 : layers.73.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 660 :    1048576 : layers.73.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 661 :    8388608 : layers.73.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 662 :   29360128 : layers.73.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 663 :   29360128 : layers.73.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 664 :   29360128 : layers.73.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 665 :       8192 : layers.73.attention_norm.weight     : [8192]          : torch.bfloat16
 666 :       8192 : layers.73.ffn_norm.weight           : [8192]          : torch.bfloat16
 667 :    8388608 : layers.74.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 668 :    1048576 : layers.74.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 669 :    1048576 : layers.74.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 670 :    8388608 : layers.74.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 671 :   29360128 : layers.74.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 672 :   29360128 : layers.74.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 673 :   29360128 : layers.74.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 674 :       8192 : layers.74.attention_norm.weight     : [8192]          : torch.bfloat16
 675 :       8192 : layers.74.ffn_norm.weight           : [8192]          : torch.bfloat16
 676 :    8388608 : layers.75.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 677 :    1048576 : layers.75.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 678 :    1048576 : layers.75.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 679 :    8388608 : layers.75.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 680 :   29360128 : layers.75.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 681 :   29360128 : layers.75.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 682 :   29360128 : layers.75.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 683 :       8192 : layers.75.attention_norm.weight     : [8192]          : torch.bfloat16
 684 :       8192 : layers.75.ffn_norm.weight           : [8192]          : torch.bfloat16
 685 :    8388608 : layers.76.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 686 :    1048576 : layers.76.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 687 :    1048576 : layers.76.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 688 :    8388608 : layers.76.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 689 :   29360128 : layers.76.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 690 :   29360128 : layers.76.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 691 :   29360128 : layers.76.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 692 :       8192 : layers.76.attention_norm.weight     : [8192]          : torch.bfloat16
 693 :       8192 : layers.76.ffn_norm.weight           : [8192]          : torch.bfloat16
 694 :    8388608 : layers.77.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 695 :    1048576 : layers.77.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 696 :    1048576 : layers.77.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 697 :    8388608 : layers.77.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 698 :   29360128 : layers.77.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 699 :   29360128 : layers.77.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 700 :   29360128 : layers.77.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 701 :       8192 : layers.77.attention_norm.weight     : [8192]          : torch.bfloat16
 702 :       8192 : layers.77.ffn_norm.weight           : [8192]          : torch.bfloat16
 703 :    8388608 : layers.78.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 704 :    1048576 : layers.78.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 705 :    1048576 : layers.78.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 706 :    8388608 : layers.78.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 707 :   29360128 : layers.78.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 708 :   29360128 : layers.78.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 709 :   29360128 : layers.78.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 710 :       8192 : layers.78.attention_norm.weight     : [8192]          : torch.bfloat16
 711 :       8192 : layers.78.ffn_norm.weight           : [8192]          : torch.bfloat16
 712 :    8388608 : layers.79.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 713 :    1048576 : layers.79.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 714 :    1048576 : layers.79.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 715 :    8388608 : layers.79.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 716 :   29360128 : layers.79.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 717 :   29360128 : layers.79.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 718 :   29360128 : layers.79.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 719 :       8192 : layers.79.attention_norm.weight     : [8192]          : torch.bfloat16
 720 :       8192 : layers.79.ffn_norm.weight           : [8192]          : torch.bfloat16
 721 :       8192 : norm.weight                         : [8192]          : torch.bfloat16
 722 :  131334144 : output.weight                       : [16032, 8192]   : torch.bfloat16

-----------------------------------------------------------------------------
44101836800 params in total.
88203673600 bytes in total.
30.09 sec, 5.48 sec, 2795.71 MB/s
-----------------------------------------------------------------------------

[6/8]: Loading original/consolidated.05.pth
   0 :  131334144 : tok_embeddings.weight               : [16032, 8192]   : torch.bfloat16
   1 :    8388608 : layers.0.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
   2 :    1048576 : layers.0.attention.wk.weight        : [128, 8192]     : torch.bfloat16
   3 :    1048576 : layers.0.attention.wv.weight        : [128, 8192]     : torch.bfloat16
   4 :    8388608 : layers.0.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
   5 :   29360128 : layers.0.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
   6 :   29360128 : layers.0.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
   7 :   29360128 : layers.0.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
   8 :       8192 : layers.0.attention_norm.weight      : [8192]          : torch.bfloat16
   9 :       8192 : layers.0.ffn_norm.weight            : [8192]          : torch.bfloat16
  10 :    8388608 : layers.1.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  11 :    1048576 : layers.1.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  12 :    1048576 : layers.1.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  13 :    8388608 : layers.1.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  14 :   29360128 : layers.1.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  15 :   29360128 : layers.1.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  16 :   29360128 : layers.1.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  17 :       8192 : layers.1.attention_norm.weight      : [8192]          : torch.bfloat16
  18 :       8192 : layers.1.ffn_norm.weight            : [8192]          : torch.bfloat16
  19 :    8388608 : layers.2.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  20 :    1048576 : layers.2.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  21 :    1048576 : layers.2.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  22 :    8388608 : layers.2.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  23 :   29360128 : layers.2.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  24 :   29360128 : layers.2.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  25 :   29360128 : layers.2.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  26 :       8192 : layers.2.attention_norm.weight      : [8192]          : torch.bfloat16
  27 :       8192 : layers.2.ffn_norm.weight            : [8192]          : torch.bfloat16
  28 :    8388608 : layers.3.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  29 :    1048576 : layers.3.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  30 :    1048576 : layers.3.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  31 :    8388608 : layers.3.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  32 :   29360128 : layers.3.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  33 :   29360128 : layers.3.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  34 :   29360128 : layers.3.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  35 :       8192 : layers.3.attention_norm.weight      : [8192]          : torch.bfloat16
  36 :       8192 : layers.3.ffn_norm.weight            : [8192]          : torch.bfloat16
  37 :    8388608 : layers.4.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  38 :    1048576 : layers.4.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  39 :    1048576 : layers.4.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  40 :    8388608 : layers.4.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  41 :   29360128 : layers.4.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  42 :   29360128 : layers.4.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  43 :   29360128 : layers.4.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  44 :       8192 : layers.4.attention_norm.weight      : [8192]          : torch.bfloat16
  45 :       8192 : layers.4.ffn_norm.weight            : [8192]          : torch.bfloat16
  46 :    8388608 : layers.5.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  47 :    1048576 : layers.5.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  48 :    1048576 : layers.5.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  49 :    8388608 : layers.5.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  50 :   29360128 : layers.5.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  51 :   29360128 : layers.5.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  52 :   29360128 : layers.5.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  53 :       8192 : layers.5.attention_norm.weight      : [8192]          : torch.bfloat16
  54 :       8192 : layers.5.ffn_norm.weight            : [8192]          : torch.bfloat16
  55 :    8388608 : layers.6.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  56 :    1048576 : layers.6.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  57 :    1048576 : layers.6.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  58 :    8388608 : layers.6.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  59 :   29360128 : layers.6.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  60 :   29360128 : layers.6.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  61 :   29360128 : layers.6.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  62 :       8192 : layers.6.attention_norm.weight      : [8192]          : torch.bfloat16
  63 :       8192 : layers.6.ffn_norm.weight            : [8192]          : torch.bfloat16
  64 :    8388608 : layers.7.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  65 :    1048576 : layers.7.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  66 :    1048576 : layers.7.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  67 :    8388608 : layers.7.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  68 :   29360128 : layers.7.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  69 :   29360128 : layers.7.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  70 :   29360128 : layers.7.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  71 :       8192 : layers.7.attention_norm.weight      : [8192]          : torch.bfloat16
  72 :       8192 : layers.7.ffn_norm.weight            : [8192]          : torch.bfloat16
  73 :    8388608 : layers.8.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  74 :    1048576 : layers.8.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  75 :    1048576 : layers.8.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  76 :    8388608 : layers.8.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  77 :   29360128 : layers.8.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  78 :   29360128 : layers.8.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  79 :   29360128 : layers.8.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  80 :       8192 : layers.8.attention_norm.weight      : [8192]          : torch.bfloat16
  81 :       8192 : layers.8.ffn_norm.weight            : [8192]          : torch.bfloat16
  82 :    8388608 : layers.9.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  83 :    1048576 : layers.9.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  84 :    1048576 : layers.9.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  85 :    8388608 : layers.9.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  86 :   29360128 : layers.9.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  87 :   29360128 : layers.9.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  88 :   29360128 : layers.9.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  89 :       8192 : layers.9.attention_norm.weight      : [8192]          : torch.bfloat16
  90 :       8192 : layers.9.ffn_norm.weight            : [8192]          : torch.bfloat16
  91 :    8388608 : layers.10.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
  92 :    1048576 : layers.10.attention.wk.weight       : [128, 8192]     : torch.bfloat16
  93 :    1048576 : layers.10.attention.wv.weight       : [128, 8192]     : torch.bfloat16
  94 :    8388608 : layers.10.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
  95 :   29360128 : layers.10.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
  96 :   29360128 : layers.10.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
  97 :   29360128 : layers.10.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
  98 :       8192 : layers.10.attention_norm.weight     : [8192]          : torch.bfloat16
  99 :       8192 : layers.10.ffn_norm.weight           : [8192]          : torch.bfloat16
 100 :    8388608 : layers.11.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 101 :    1048576 : layers.11.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 102 :    1048576 : layers.11.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 103 :    8388608 : layers.11.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 104 :   29360128 : layers.11.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 105 :   29360128 : layers.11.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 106 :   29360128 : layers.11.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 107 :       8192 : layers.11.attention_norm.weight     : [8192]          : torch.bfloat16
 108 :       8192 : layers.11.ffn_norm.weight           : [8192]          : torch.bfloat16
 109 :    8388608 : layers.12.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 110 :    1048576 : layers.12.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 111 :    1048576 : layers.12.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 112 :    8388608 : layers.12.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 113 :   29360128 : layers.12.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 114 :   29360128 : layers.12.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 115 :   29360128 : layers.12.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 116 :       8192 : layers.12.attention_norm.weight     : [8192]          : torch.bfloat16
 117 :       8192 : layers.12.ffn_norm.weight           : [8192]          : torch.bfloat16
 118 :    8388608 : layers.13.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 119 :    1048576 : layers.13.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 120 :    1048576 : layers.13.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 121 :    8388608 : layers.13.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 122 :   29360128 : layers.13.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 123 :   29360128 : layers.13.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 124 :   29360128 : layers.13.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 125 :       8192 : layers.13.attention_norm.weight     : [8192]          : torch.bfloat16
 126 :       8192 : layers.13.ffn_norm.weight           : [8192]          : torch.bfloat16
 127 :    8388608 : layers.14.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 128 :    1048576 : layers.14.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 129 :    1048576 : layers.14.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 130 :    8388608 : layers.14.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 131 :   29360128 : layers.14.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 132 :   29360128 : layers.14.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 133 :   29360128 : layers.14.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 134 :       8192 : layers.14.attention_norm.weight     : [8192]          : torch.bfloat16
 135 :       8192 : layers.14.ffn_norm.weight           : [8192]          : torch.bfloat16
 136 :    8388608 : layers.15.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 137 :    1048576 : layers.15.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 138 :    1048576 : layers.15.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 139 :    8388608 : layers.15.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 140 :   29360128 : layers.15.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 141 :   29360128 : layers.15.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 142 :   29360128 : layers.15.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 143 :       8192 : layers.15.attention_norm.weight     : [8192]          : torch.bfloat16
 144 :       8192 : layers.15.ffn_norm.weight           : [8192]          : torch.bfloat16
 145 :    8388608 : layers.16.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 146 :    1048576 : layers.16.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 147 :    1048576 : layers.16.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 148 :    8388608 : layers.16.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 149 :   29360128 : layers.16.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 150 :   29360128 : layers.16.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 151 :   29360128 : layers.16.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 152 :       8192 : layers.16.attention_norm.weight     : [8192]          : torch.bfloat16
 153 :       8192 : layers.16.ffn_norm.weight           : [8192]          : torch.bfloat16
 154 :    8388608 : layers.17.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 155 :    1048576 : layers.17.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 156 :    1048576 : layers.17.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 157 :    8388608 : layers.17.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 158 :   29360128 : layers.17.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 159 :   29360128 : layers.17.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 160 :   29360128 : layers.17.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 161 :       8192 : layers.17.attention_norm.weight     : [8192]          : torch.bfloat16
 162 :       8192 : layers.17.ffn_norm.weight           : [8192]          : torch.bfloat16
 163 :    8388608 : layers.18.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 164 :    1048576 : layers.18.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 165 :    1048576 : layers.18.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 166 :    8388608 : layers.18.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 167 :   29360128 : layers.18.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 168 :   29360128 : layers.18.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 169 :   29360128 : layers.18.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 170 :       8192 : layers.18.attention_norm.weight     : [8192]          : torch.bfloat16
 171 :       8192 : layers.18.ffn_norm.weight           : [8192]          : torch.bfloat16
 172 :    8388608 : layers.19.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 173 :    1048576 : layers.19.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 174 :    1048576 : layers.19.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 175 :    8388608 : layers.19.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 176 :   29360128 : layers.19.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 177 :   29360128 : layers.19.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 178 :   29360128 : layers.19.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 179 :       8192 : layers.19.attention_norm.weight     : [8192]          : torch.bfloat16
 180 :       8192 : layers.19.ffn_norm.weight           : [8192]          : torch.bfloat16
 181 :    8388608 : layers.20.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 182 :    1048576 : layers.20.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 183 :    1048576 : layers.20.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 184 :    8388608 : layers.20.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 185 :   29360128 : layers.20.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 186 :   29360128 : layers.20.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 187 :   29360128 : layers.20.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 188 :       8192 : layers.20.attention_norm.weight     : [8192]          : torch.bfloat16
 189 :       8192 : layers.20.ffn_norm.weight           : [8192]          : torch.bfloat16
 190 :    8388608 : layers.21.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 191 :    1048576 : layers.21.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 192 :    1048576 : layers.21.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 193 :    8388608 : layers.21.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 194 :   29360128 : layers.21.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 195 :   29360128 : layers.21.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 196 :   29360128 : layers.21.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 197 :       8192 : layers.21.attention_norm.weight     : [8192]          : torch.bfloat16
 198 :       8192 : layers.21.ffn_norm.weight           : [8192]          : torch.bfloat16
 199 :    8388608 : layers.22.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 200 :    1048576 : layers.22.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 201 :    1048576 : layers.22.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 202 :    8388608 : layers.22.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 203 :   29360128 : layers.22.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 204 :   29360128 : layers.22.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 205 :   29360128 : layers.22.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 206 :       8192 : layers.22.attention_norm.weight     : [8192]          : torch.bfloat16
 207 :       8192 : layers.22.ffn_norm.weight           : [8192]          : torch.bfloat16
 208 :    8388608 : layers.23.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 209 :    1048576 : layers.23.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 210 :    1048576 : layers.23.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 211 :    8388608 : layers.23.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 212 :   29360128 : layers.23.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 213 :   29360128 : layers.23.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 214 :   29360128 : layers.23.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 215 :       8192 : layers.23.attention_norm.weight     : [8192]          : torch.bfloat16
 216 :       8192 : layers.23.ffn_norm.weight           : [8192]          : torch.bfloat16
 217 :    8388608 : layers.24.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 218 :    1048576 : layers.24.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 219 :    1048576 : layers.24.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 220 :    8388608 : layers.24.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 221 :   29360128 : layers.24.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 222 :   29360128 : layers.24.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 223 :   29360128 : layers.24.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 224 :       8192 : layers.24.attention_norm.weight     : [8192]          : torch.bfloat16
 225 :       8192 : layers.24.ffn_norm.weight           : [8192]          : torch.bfloat16
 226 :    8388608 : layers.25.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 227 :    1048576 : layers.25.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 228 :    1048576 : layers.25.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 229 :    8388608 : layers.25.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 230 :   29360128 : layers.25.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 231 :   29360128 : layers.25.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 232 :   29360128 : layers.25.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 233 :       8192 : layers.25.attention_norm.weight     : [8192]          : torch.bfloat16
 234 :       8192 : layers.25.ffn_norm.weight           : [8192]          : torch.bfloat16
 235 :    8388608 : layers.26.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 236 :    1048576 : layers.26.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 237 :    1048576 : layers.26.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 238 :    8388608 : layers.26.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 239 :   29360128 : layers.26.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 240 :   29360128 : layers.26.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 241 :   29360128 : layers.26.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 242 :       8192 : layers.26.attention_norm.weight     : [8192]          : torch.bfloat16
 243 :       8192 : layers.26.ffn_norm.weight           : [8192]          : torch.bfloat16
 244 :    8388608 : layers.27.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 245 :    1048576 : layers.27.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 246 :    1048576 : layers.27.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 247 :    8388608 : layers.27.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 248 :   29360128 : layers.27.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 249 :   29360128 : layers.27.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 250 :   29360128 : layers.27.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 251 :       8192 : layers.27.attention_norm.weight     : [8192]          : torch.bfloat16
 252 :       8192 : layers.27.ffn_norm.weight           : [8192]          : torch.bfloat16
 253 :    8388608 : layers.28.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 254 :    1048576 : layers.28.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 255 :    1048576 : layers.28.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 256 :    8388608 : layers.28.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 257 :   29360128 : layers.28.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 258 :   29360128 : layers.28.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 259 :   29360128 : layers.28.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 260 :       8192 : layers.28.attention_norm.weight     : [8192]          : torch.bfloat16
 261 :       8192 : layers.28.ffn_norm.weight           : [8192]          : torch.bfloat16
 262 :    8388608 : layers.29.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 263 :    1048576 : layers.29.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 264 :    1048576 : layers.29.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 265 :    8388608 : layers.29.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 266 :   29360128 : layers.29.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 267 :   29360128 : layers.29.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 268 :   29360128 : layers.29.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 269 :       8192 : layers.29.attention_norm.weight     : [8192]          : torch.bfloat16
 270 :       8192 : layers.29.ffn_norm.weight           : [8192]          : torch.bfloat16
 271 :    8388608 : layers.30.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 272 :    1048576 : layers.30.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 273 :    1048576 : layers.30.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 274 :    8388608 : layers.30.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 275 :   29360128 : layers.30.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 276 :   29360128 : layers.30.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 277 :   29360128 : layers.30.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 278 :       8192 : layers.30.attention_norm.weight     : [8192]          : torch.bfloat16
 279 :       8192 : layers.30.ffn_norm.weight           : [8192]          : torch.bfloat16
 280 :    8388608 : layers.31.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 281 :    1048576 : layers.31.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 282 :    1048576 : layers.31.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 283 :    8388608 : layers.31.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 284 :   29360128 : layers.31.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 285 :   29360128 : layers.31.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 286 :   29360128 : layers.31.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 287 :       8192 : layers.31.attention_norm.weight     : [8192]          : torch.bfloat16
 288 :       8192 : layers.31.ffn_norm.weight           : [8192]          : torch.bfloat16
 289 :    8388608 : layers.32.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 290 :    1048576 : layers.32.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 291 :    1048576 : layers.32.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 292 :    8388608 : layers.32.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 293 :   29360128 : layers.32.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 294 :   29360128 : layers.32.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 295 :   29360128 : layers.32.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 296 :       8192 : layers.32.attention_norm.weight     : [8192]          : torch.bfloat16
 297 :       8192 : layers.32.ffn_norm.weight           : [8192]          : torch.bfloat16
 298 :    8388608 : layers.33.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 299 :    1048576 : layers.33.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 300 :    1048576 : layers.33.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 301 :    8388608 : layers.33.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 302 :   29360128 : layers.33.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 303 :   29360128 : layers.33.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 304 :   29360128 : layers.33.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 305 :       8192 : layers.33.attention_norm.weight     : [8192]          : torch.bfloat16
 306 :       8192 : layers.33.ffn_norm.weight           : [8192]          : torch.bfloat16
 307 :    8388608 : layers.34.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 308 :    1048576 : layers.34.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 309 :    1048576 : layers.34.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 310 :    8388608 : layers.34.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 311 :   29360128 : layers.34.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 312 :   29360128 : layers.34.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 313 :   29360128 : layers.34.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 314 :       8192 : layers.34.attention_norm.weight     : [8192]          : torch.bfloat16
 315 :       8192 : layers.34.ffn_norm.weight           : [8192]          : torch.bfloat16
 316 :    8388608 : layers.35.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 317 :    1048576 : layers.35.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 318 :    1048576 : layers.35.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 319 :    8388608 : layers.35.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 320 :   29360128 : layers.35.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 321 :   29360128 : layers.35.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 322 :   29360128 : layers.35.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 323 :       8192 : layers.35.attention_norm.weight     : [8192]          : torch.bfloat16
 324 :       8192 : layers.35.ffn_norm.weight           : [8192]          : torch.bfloat16
 325 :    8388608 : layers.36.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 326 :    1048576 : layers.36.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 327 :    1048576 : layers.36.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 328 :    8388608 : layers.36.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 329 :   29360128 : layers.36.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 330 :   29360128 : layers.36.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 331 :   29360128 : layers.36.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 332 :       8192 : layers.36.attention_norm.weight     : [8192]          : torch.bfloat16
 333 :       8192 : layers.36.ffn_norm.weight           : [8192]          : torch.bfloat16
 334 :    8388608 : layers.37.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 335 :    1048576 : layers.37.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 336 :    1048576 : layers.37.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 337 :    8388608 : layers.37.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 338 :   29360128 : layers.37.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 339 :   29360128 : layers.37.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 340 :   29360128 : layers.37.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 341 :       8192 : layers.37.attention_norm.weight     : [8192]          : torch.bfloat16
 342 :       8192 : layers.37.ffn_norm.weight           : [8192]          : torch.bfloat16
 343 :    8388608 : layers.38.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 344 :    1048576 : layers.38.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 345 :    1048576 : layers.38.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 346 :    8388608 : layers.38.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 347 :   29360128 : layers.38.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 348 :   29360128 : layers.38.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 349 :   29360128 : layers.38.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 350 :       8192 : layers.38.attention_norm.weight     : [8192]          : torch.bfloat16
 351 :       8192 : layers.38.ffn_norm.weight           : [8192]          : torch.bfloat16
 352 :    8388608 : layers.39.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 353 :    1048576 : layers.39.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 354 :    1048576 : layers.39.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 355 :    8388608 : layers.39.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 356 :   29360128 : layers.39.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 357 :   29360128 : layers.39.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 358 :   29360128 : layers.39.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 359 :       8192 : layers.39.attention_norm.weight     : [8192]          : torch.bfloat16
 360 :       8192 : layers.39.ffn_norm.weight           : [8192]          : torch.bfloat16
 361 :    8388608 : layers.40.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 362 :    1048576 : layers.40.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 363 :    1048576 : layers.40.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 364 :    8388608 : layers.40.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 365 :   29360128 : layers.40.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 366 :   29360128 : layers.40.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 367 :   29360128 : layers.40.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 368 :       8192 : layers.40.attention_norm.weight     : [8192]          : torch.bfloat16
 369 :       8192 : layers.40.ffn_norm.weight           : [8192]          : torch.bfloat16
 370 :    8388608 : layers.41.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 371 :    1048576 : layers.41.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 372 :    1048576 : layers.41.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 373 :    8388608 : layers.41.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 374 :   29360128 : layers.41.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 375 :   29360128 : layers.41.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 376 :   29360128 : layers.41.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 377 :       8192 : layers.41.attention_norm.weight     : [8192]          : torch.bfloat16
 378 :       8192 : layers.41.ffn_norm.weight           : [8192]          : torch.bfloat16
 379 :    8388608 : layers.42.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 380 :    1048576 : layers.42.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 381 :    1048576 : layers.42.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 382 :    8388608 : layers.42.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 383 :   29360128 : layers.42.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 384 :   29360128 : layers.42.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 385 :   29360128 : layers.42.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 386 :       8192 : layers.42.attention_norm.weight     : [8192]          : torch.bfloat16
 387 :       8192 : layers.42.ffn_norm.weight           : [8192]          : torch.bfloat16
 388 :    8388608 : layers.43.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 389 :    1048576 : layers.43.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 390 :    1048576 : layers.43.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 391 :    8388608 : layers.43.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 392 :   29360128 : layers.43.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 393 :   29360128 : layers.43.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 394 :   29360128 : layers.43.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 395 :       8192 : layers.43.attention_norm.weight     : [8192]          : torch.bfloat16
 396 :       8192 : layers.43.ffn_norm.weight           : [8192]          : torch.bfloat16
 397 :    8388608 : layers.44.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 398 :    1048576 : layers.44.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 399 :    1048576 : layers.44.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 400 :    8388608 : layers.44.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 401 :   29360128 : layers.44.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 402 :   29360128 : layers.44.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 403 :   29360128 : layers.44.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 404 :       8192 : layers.44.attention_norm.weight     : [8192]          : torch.bfloat16
 405 :       8192 : layers.44.ffn_norm.weight           : [8192]          : torch.bfloat16
 406 :    8388608 : layers.45.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 407 :    1048576 : layers.45.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 408 :    1048576 : layers.45.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 409 :    8388608 : layers.45.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 410 :   29360128 : layers.45.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 411 :   29360128 : layers.45.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 412 :   29360128 : layers.45.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 413 :       8192 : layers.45.attention_norm.weight     : [8192]          : torch.bfloat16
 414 :       8192 : layers.45.ffn_norm.weight           : [8192]          : torch.bfloat16
 415 :    8388608 : layers.46.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 416 :    1048576 : layers.46.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 417 :    1048576 : layers.46.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 418 :    8388608 : layers.46.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 419 :   29360128 : layers.46.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 420 :   29360128 : layers.46.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 421 :   29360128 : layers.46.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 422 :       8192 : layers.46.attention_norm.weight     : [8192]          : torch.bfloat16
 423 :       8192 : layers.46.ffn_norm.weight           : [8192]          : torch.bfloat16
 424 :    8388608 : layers.47.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 425 :    1048576 : layers.47.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 426 :    1048576 : layers.47.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 427 :    8388608 : layers.47.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 428 :   29360128 : layers.47.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 429 :   29360128 : layers.47.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 430 :   29360128 : layers.47.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 431 :       8192 : layers.47.attention_norm.weight     : [8192]          : torch.bfloat16
 432 :       8192 : layers.47.ffn_norm.weight           : [8192]          : torch.bfloat16
 433 :    8388608 : layers.48.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 434 :    1048576 : layers.48.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 435 :    1048576 : layers.48.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 436 :    8388608 : layers.48.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 437 :   29360128 : layers.48.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 438 :   29360128 : layers.48.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 439 :   29360128 : layers.48.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 440 :       8192 : layers.48.attention_norm.weight     : [8192]          : torch.bfloat16
 441 :       8192 : layers.48.ffn_norm.weight           : [8192]          : torch.bfloat16
 442 :    8388608 : layers.49.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 443 :    1048576 : layers.49.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 444 :    1048576 : layers.49.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 445 :    8388608 : layers.49.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 446 :   29360128 : layers.49.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 447 :   29360128 : layers.49.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 448 :   29360128 : layers.49.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 449 :       8192 : layers.49.attention_norm.weight     : [8192]          : torch.bfloat16
 450 :       8192 : layers.49.ffn_norm.weight           : [8192]          : torch.bfloat16
 451 :    8388608 : layers.50.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 452 :    1048576 : layers.50.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 453 :    1048576 : layers.50.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 454 :    8388608 : layers.50.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 455 :   29360128 : layers.50.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 456 :   29360128 : layers.50.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 457 :   29360128 : layers.50.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 458 :       8192 : layers.50.attention_norm.weight     : [8192]          : torch.bfloat16
 459 :       8192 : layers.50.ffn_norm.weight           : [8192]          : torch.bfloat16
 460 :    8388608 : layers.51.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 461 :    1048576 : layers.51.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 462 :    1048576 : layers.51.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 463 :    8388608 : layers.51.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 464 :   29360128 : layers.51.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 465 :   29360128 : layers.51.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 466 :   29360128 : layers.51.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 467 :       8192 : layers.51.attention_norm.weight     : [8192]          : torch.bfloat16
 468 :       8192 : layers.51.ffn_norm.weight           : [8192]          : torch.bfloat16
 469 :    8388608 : layers.52.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 470 :    1048576 : layers.52.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 471 :    1048576 : layers.52.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 472 :    8388608 : layers.52.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 473 :   29360128 : layers.52.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 474 :   29360128 : layers.52.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 475 :   29360128 : layers.52.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 476 :       8192 : layers.52.attention_norm.weight     : [8192]          : torch.bfloat16
 477 :       8192 : layers.52.ffn_norm.weight           : [8192]          : torch.bfloat16
 478 :    8388608 : layers.53.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 479 :    1048576 : layers.53.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 480 :    1048576 : layers.53.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 481 :    8388608 : layers.53.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 482 :   29360128 : layers.53.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 483 :   29360128 : layers.53.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 484 :   29360128 : layers.53.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 485 :       8192 : layers.53.attention_norm.weight     : [8192]          : torch.bfloat16
 486 :       8192 : layers.53.ffn_norm.weight           : [8192]          : torch.bfloat16
 487 :    8388608 : layers.54.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 488 :    1048576 : layers.54.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 489 :    1048576 : layers.54.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 490 :    8388608 : layers.54.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 491 :   29360128 : layers.54.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 492 :   29360128 : layers.54.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 493 :   29360128 : layers.54.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 494 :       8192 : layers.54.attention_norm.weight     : [8192]          : torch.bfloat16
 495 :       8192 : layers.54.ffn_norm.weight           : [8192]          : torch.bfloat16
 496 :    8388608 : layers.55.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 497 :    1048576 : layers.55.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 498 :    1048576 : layers.55.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 499 :    8388608 : layers.55.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 500 :   29360128 : layers.55.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 501 :   29360128 : layers.55.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 502 :   29360128 : layers.55.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 503 :       8192 : layers.55.attention_norm.weight     : [8192]          : torch.bfloat16
 504 :       8192 : layers.55.ffn_norm.weight           : [8192]          : torch.bfloat16
 505 :    8388608 : layers.56.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 506 :    1048576 : layers.56.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 507 :    1048576 : layers.56.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 508 :    8388608 : layers.56.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 509 :   29360128 : layers.56.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 510 :   29360128 : layers.56.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 511 :   29360128 : layers.56.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 512 :       8192 : layers.56.attention_norm.weight     : [8192]          : torch.bfloat16
 513 :       8192 : layers.56.ffn_norm.weight           : [8192]          : torch.bfloat16
 514 :    8388608 : layers.57.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 515 :    1048576 : layers.57.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 516 :    1048576 : layers.57.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 517 :    8388608 : layers.57.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 518 :   29360128 : layers.57.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 519 :   29360128 : layers.57.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 520 :   29360128 : layers.57.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 521 :       8192 : layers.57.attention_norm.weight     : [8192]          : torch.bfloat16
 522 :       8192 : layers.57.ffn_norm.weight           : [8192]          : torch.bfloat16
 523 :    8388608 : layers.58.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 524 :    1048576 : layers.58.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 525 :    1048576 : layers.58.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 526 :    8388608 : layers.58.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 527 :   29360128 : layers.58.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 528 :   29360128 : layers.58.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 529 :   29360128 : layers.58.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 530 :       8192 : layers.58.attention_norm.weight     : [8192]          : torch.bfloat16
 531 :       8192 : layers.58.ffn_norm.weight           : [8192]          : torch.bfloat16
 532 :    8388608 : layers.59.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 533 :    1048576 : layers.59.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 534 :    1048576 : layers.59.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 535 :    8388608 : layers.59.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 536 :   29360128 : layers.59.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 537 :   29360128 : layers.59.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 538 :   29360128 : layers.59.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 539 :       8192 : layers.59.attention_norm.weight     : [8192]          : torch.bfloat16
 540 :       8192 : layers.59.ffn_norm.weight           : [8192]          : torch.bfloat16
 541 :    8388608 : layers.60.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 542 :    1048576 : layers.60.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 543 :    1048576 : layers.60.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 544 :    8388608 : layers.60.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 545 :   29360128 : layers.60.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 546 :   29360128 : layers.60.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 547 :   29360128 : layers.60.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 548 :       8192 : layers.60.attention_norm.weight     : [8192]          : torch.bfloat16
 549 :       8192 : layers.60.ffn_norm.weight           : [8192]          : torch.bfloat16
 550 :    8388608 : layers.61.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 551 :    1048576 : layers.61.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 552 :    1048576 : layers.61.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 553 :    8388608 : layers.61.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 554 :   29360128 : layers.61.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 555 :   29360128 : layers.61.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 556 :   29360128 : layers.61.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 557 :       8192 : layers.61.attention_norm.weight     : [8192]          : torch.bfloat16
 558 :       8192 : layers.61.ffn_norm.weight           : [8192]          : torch.bfloat16
 559 :    8388608 : layers.62.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 560 :    1048576 : layers.62.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 561 :    1048576 : layers.62.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 562 :    8388608 : layers.62.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 563 :   29360128 : layers.62.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 564 :   29360128 : layers.62.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 565 :   29360128 : layers.62.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 566 :       8192 : layers.62.attention_norm.weight     : [8192]          : torch.bfloat16
 567 :       8192 : layers.62.ffn_norm.weight           : [8192]          : torch.bfloat16
 568 :    8388608 : layers.63.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 569 :    1048576 : layers.63.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 570 :    1048576 : layers.63.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 571 :    8388608 : layers.63.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 572 :   29360128 : layers.63.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 573 :   29360128 : layers.63.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 574 :   29360128 : layers.63.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 575 :       8192 : layers.63.attention_norm.weight     : [8192]          : torch.bfloat16
 576 :       8192 : layers.63.ffn_norm.weight           : [8192]          : torch.bfloat16
 577 :    8388608 : layers.64.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 578 :    1048576 : layers.64.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 579 :    1048576 : layers.64.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 580 :    8388608 : layers.64.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 581 :   29360128 : layers.64.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 582 :   29360128 : layers.64.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 583 :   29360128 : layers.64.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 584 :       8192 : layers.64.attention_norm.weight     : [8192]          : torch.bfloat16
 585 :       8192 : layers.64.ffn_norm.weight           : [8192]          : torch.bfloat16
 586 :    8388608 : layers.65.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 587 :    1048576 : layers.65.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 588 :    1048576 : layers.65.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 589 :    8388608 : layers.65.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 590 :   29360128 : layers.65.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 591 :   29360128 : layers.65.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 592 :   29360128 : layers.65.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 593 :       8192 : layers.65.attention_norm.weight     : [8192]          : torch.bfloat16
 594 :       8192 : layers.65.ffn_norm.weight           : [8192]          : torch.bfloat16
 595 :    8388608 : layers.66.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 596 :    1048576 : layers.66.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 597 :    1048576 : layers.66.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 598 :    8388608 : layers.66.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 599 :   29360128 : layers.66.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 600 :   29360128 : layers.66.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 601 :   29360128 : layers.66.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 602 :       8192 : layers.66.attention_norm.weight     : [8192]          : torch.bfloat16
 603 :       8192 : layers.66.ffn_norm.weight           : [8192]          : torch.bfloat16
 604 :    8388608 : layers.67.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 605 :    1048576 : layers.67.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 606 :    1048576 : layers.67.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 607 :    8388608 : layers.67.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 608 :   29360128 : layers.67.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 609 :   29360128 : layers.67.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 610 :   29360128 : layers.67.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 611 :       8192 : layers.67.attention_norm.weight     : [8192]          : torch.bfloat16
 612 :       8192 : layers.67.ffn_norm.weight           : [8192]          : torch.bfloat16
 613 :    8388608 : layers.68.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 614 :    1048576 : layers.68.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 615 :    1048576 : layers.68.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 616 :    8388608 : layers.68.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 617 :   29360128 : layers.68.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 618 :   29360128 : layers.68.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 619 :   29360128 : layers.68.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 620 :       8192 : layers.68.attention_norm.weight     : [8192]          : torch.bfloat16
 621 :       8192 : layers.68.ffn_norm.weight           : [8192]          : torch.bfloat16
 622 :    8388608 : layers.69.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 623 :    1048576 : layers.69.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 624 :    1048576 : layers.69.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 625 :    8388608 : layers.69.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 626 :   29360128 : layers.69.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 627 :   29360128 : layers.69.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 628 :   29360128 : layers.69.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 629 :       8192 : layers.69.attention_norm.weight     : [8192]          : torch.bfloat16
 630 :       8192 : layers.69.ffn_norm.weight           : [8192]          : torch.bfloat16
 631 :    8388608 : layers.70.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 632 :    1048576 : layers.70.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 633 :    1048576 : layers.70.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 634 :    8388608 : layers.70.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 635 :   29360128 : layers.70.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 636 :   29360128 : layers.70.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 637 :   29360128 : layers.70.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 638 :       8192 : layers.70.attention_norm.weight     : [8192]          : torch.bfloat16
 639 :       8192 : layers.70.ffn_norm.weight           : [8192]          : torch.bfloat16
 640 :    8388608 : layers.71.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 641 :    1048576 : layers.71.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 642 :    1048576 : layers.71.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 643 :    8388608 : layers.71.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 644 :   29360128 : layers.71.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 645 :   29360128 : layers.71.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 646 :   29360128 : layers.71.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 647 :       8192 : layers.71.attention_norm.weight     : [8192]          : torch.bfloat16
 648 :       8192 : layers.71.ffn_norm.weight           : [8192]          : torch.bfloat16
 649 :    8388608 : layers.72.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 650 :    1048576 : layers.72.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 651 :    1048576 : layers.72.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 652 :    8388608 : layers.72.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 653 :   29360128 : layers.72.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 654 :   29360128 : layers.72.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 655 :   29360128 : layers.72.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 656 :       8192 : layers.72.attention_norm.weight     : [8192]          : torch.bfloat16
 657 :       8192 : layers.72.ffn_norm.weight           : [8192]          : torch.bfloat16
 658 :    8388608 : layers.73.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 659 :    1048576 : layers.73.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 660 :    1048576 : layers.73.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 661 :    8388608 : layers.73.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 662 :   29360128 : layers.73.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 663 :   29360128 : layers.73.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 664 :   29360128 : layers.73.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 665 :       8192 : layers.73.attention_norm.weight     : [8192]          : torch.bfloat16
 666 :       8192 : layers.73.ffn_norm.weight           : [8192]          : torch.bfloat16
 667 :    8388608 : layers.74.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 668 :    1048576 : layers.74.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 669 :    1048576 : layers.74.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 670 :    8388608 : layers.74.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 671 :   29360128 : layers.74.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 672 :   29360128 : layers.74.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 673 :   29360128 : layers.74.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 674 :       8192 : layers.74.attention_norm.weight     : [8192]          : torch.bfloat16
 675 :       8192 : layers.74.ffn_norm.weight           : [8192]          : torch.bfloat16
 676 :    8388608 : layers.75.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 677 :    1048576 : layers.75.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 678 :    1048576 : layers.75.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 679 :    8388608 : layers.75.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 680 :   29360128 : layers.75.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 681 :   29360128 : layers.75.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 682 :   29360128 : layers.75.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 683 :       8192 : layers.75.attention_norm.weight     : [8192]          : torch.bfloat16
 684 :       8192 : layers.75.ffn_norm.weight           : [8192]          : torch.bfloat16
 685 :    8388608 : layers.76.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 686 :    1048576 : layers.76.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 687 :    1048576 : layers.76.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 688 :    8388608 : layers.76.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 689 :   29360128 : layers.76.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 690 :   29360128 : layers.76.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 691 :   29360128 : layers.76.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 692 :       8192 : layers.76.attention_norm.weight     : [8192]          : torch.bfloat16
 693 :       8192 : layers.76.ffn_norm.weight           : [8192]          : torch.bfloat16
 694 :    8388608 : layers.77.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 695 :    1048576 : layers.77.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 696 :    1048576 : layers.77.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 697 :    8388608 : layers.77.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 698 :   29360128 : layers.77.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 699 :   29360128 : layers.77.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 700 :   29360128 : layers.77.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 701 :       8192 : layers.77.attention_norm.weight     : [8192]          : torch.bfloat16
 702 :       8192 : layers.77.ffn_norm.weight           : [8192]          : torch.bfloat16
 703 :    8388608 : layers.78.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 704 :    1048576 : layers.78.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 705 :    1048576 : layers.78.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 706 :    8388608 : layers.78.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 707 :   29360128 : layers.78.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 708 :   29360128 : layers.78.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 709 :   29360128 : layers.78.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 710 :       8192 : layers.78.attention_norm.weight     : [8192]          : torch.bfloat16
 711 :       8192 : layers.78.ffn_norm.weight           : [8192]          : torch.bfloat16
 712 :    8388608 : layers.79.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 713 :    1048576 : layers.79.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 714 :    1048576 : layers.79.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 715 :    8388608 : layers.79.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 716 :   29360128 : layers.79.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 717 :   29360128 : layers.79.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 718 :   29360128 : layers.79.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 719 :       8192 : layers.79.attention_norm.weight     : [8192]          : torch.bfloat16
 720 :       8192 : layers.79.ffn_norm.weight           : [8192]          : torch.bfloat16
 721 :       8192 : norm.weight                         : [8192]          : torch.bfloat16
 722 :  131334144 : output.weight                       : [16032, 8192]   : torch.bfloat16

-----------------------------------------------------------------------------
52922204160 params in total.
105844408320 bytes in total.
35.50 sec, 5.41 sec, 2843.42 MB/s
-----------------------------------------------------------------------------

[7/8]: Loading original/consolidated.06.pth
   0 :  131334144 : tok_embeddings.weight               : [16032, 8192]   : torch.bfloat16
   1 :    8388608 : layers.0.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
   2 :    1048576 : layers.0.attention.wk.weight        : [128, 8192]     : torch.bfloat16
   3 :    1048576 : layers.0.attention.wv.weight        : [128, 8192]     : torch.bfloat16
   4 :    8388608 : layers.0.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
   5 :   29360128 : layers.0.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
   6 :   29360128 : layers.0.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
   7 :   29360128 : layers.0.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
   8 :       8192 : layers.0.attention_norm.weight      : [8192]          : torch.bfloat16
   9 :       8192 : layers.0.ffn_norm.weight            : [8192]          : torch.bfloat16
  10 :    8388608 : layers.1.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  11 :    1048576 : layers.1.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  12 :    1048576 : layers.1.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  13 :    8388608 : layers.1.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  14 :   29360128 : layers.1.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  15 :   29360128 : layers.1.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  16 :   29360128 : layers.1.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  17 :       8192 : layers.1.attention_norm.weight      : [8192]          : torch.bfloat16
  18 :       8192 : layers.1.ffn_norm.weight            : [8192]          : torch.bfloat16
  19 :    8388608 : layers.2.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  20 :    1048576 : layers.2.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  21 :    1048576 : layers.2.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  22 :    8388608 : layers.2.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  23 :   29360128 : layers.2.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  24 :   29360128 : layers.2.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  25 :   29360128 : layers.2.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  26 :       8192 : layers.2.attention_norm.weight      : [8192]          : torch.bfloat16
  27 :       8192 : layers.2.ffn_norm.weight            : [8192]          : torch.bfloat16
  28 :    8388608 : layers.3.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  29 :    1048576 : layers.3.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  30 :    1048576 : layers.3.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  31 :    8388608 : layers.3.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  32 :   29360128 : layers.3.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  33 :   29360128 : layers.3.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  34 :   29360128 : layers.3.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  35 :       8192 : layers.3.attention_norm.weight      : [8192]          : torch.bfloat16
  36 :       8192 : layers.3.ffn_norm.weight            : [8192]          : torch.bfloat16
  37 :    8388608 : layers.4.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  38 :    1048576 : layers.4.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  39 :    1048576 : layers.4.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  40 :    8388608 : layers.4.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  41 :   29360128 : layers.4.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  42 :   29360128 : layers.4.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  43 :   29360128 : layers.4.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  44 :       8192 : layers.4.attention_norm.weight      : [8192]          : torch.bfloat16
  45 :       8192 : layers.4.ffn_norm.weight            : [8192]          : torch.bfloat16
  46 :    8388608 : layers.5.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  47 :    1048576 : layers.5.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  48 :    1048576 : layers.5.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  49 :    8388608 : layers.5.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  50 :   29360128 : layers.5.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  51 :   29360128 : layers.5.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  52 :   29360128 : layers.5.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  53 :       8192 : layers.5.attention_norm.weight      : [8192]          : torch.bfloat16
  54 :       8192 : layers.5.ffn_norm.weight            : [8192]          : torch.bfloat16
  55 :    8388608 : layers.6.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  56 :    1048576 : layers.6.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  57 :    1048576 : layers.6.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  58 :    8388608 : layers.6.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  59 :   29360128 : layers.6.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  60 :   29360128 : layers.6.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  61 :   29360128 : layers.6.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  62 :       8192 : layers.6.attention_norm.weight      : [8192]          : torch.bfloat16
  63 :       8192 : layers.6.ffn_norm.weight            : [8192]          : torch.bfloat16
  64 :    8388608 : layers.7.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  65 :    1048576 : layers.7.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  66 :    1048576 : layers.7.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  67 :    8388608 : layers.7.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  68 :   29360128 : layers.7.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  69 :   29360128 : layers.7.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  70 :   29360128 : layers.7.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  71 :       8192 : layers.7.attention_norm.weight      : [8192]          : torch.bfloat16
  72 :       8192 : layers.7.ffn_norm.weight            : [8192]          : torch.bfloat16
  73 :    8388608 : layers.8.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  74 :    1048576 : layers.8.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  75 :    1048576 : layers.8.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  76 :    8388608 : layers.8.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  77 :   29360128 : layers.8.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  78 :   29360128 : layers.8.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  79 :   29360128 : layers.8.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  80 :       8192 : layers.8.attention_norm.weight      : [8192]          : torch.bfloat16
  81 :       8192 : layers.8.ffn_norm.weight            : [8192]          : torch.bfloat16
  82 :    8388608 : layers.9.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  83 :    1048576 : layers.9.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  84 :    1048576 : layers.9.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  85 :    8388608 : layers.9.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  86 :   29360128 : layers.9.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  87 :   29360128 : layers.9.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  88 :   29360128 : layers.9.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  89 :       8192 : layers.9.attention_norm.weight      : [8192]          : torch.bfloat16
  90 :       8192 : layers.9.ffn_norm.weight            : [8192]          : torch.bfloat16
  91 :    8388608 : layers.10.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
  92 :    1048576 : layers.10.attention.wk.weight       : [128, 8192]     : torch.bfloat16
  93 :    1048576 : layers.10.attention.wv.weight       : [128, 8192]     : torch.bfloat16
  94 :    8388608 : layers.10.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
  95 :   29360128 : layers.10.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
  96 :   29360128 : layers.10.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
  97 :   29360128 : layers.10.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
  98 :       8192 : layers.10.attention_norm.weight     : [8192]          : torch.bfloat16
  99 :       8192 : layers.10.ffn_norm.weight           : [8192]          : torch.bfloat16
 100 :    8388608 : layers.11.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 101 :    1048576 : layers.11.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 102 :    1048576 : layers.11.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 103 :    8388608 : layers.11.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 104 :   29360128 : layers.11.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 105 :   29360128 : layers.11.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 106 :   29360128 : layers.11.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 107 :       8192 : layers.11.attention_norm.weight     : [8192]          : torch.bfloat16
 108 :       8192 : layers.11.ffn_norm.weight           : [8192]          : torch.bfloat16
 109 :    8388608 : layers.12.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 110 :    1048576 : layers.12.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 111 :    1048576 : layers.12.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 112 :    8388608 : layers.12.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 113 :   29360128 : layers.12.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 114 :   29360128 : layers.12.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 115 :   29360128 : layers.12.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 116 :       8192 : layers.12.attention_norm.weight     : [8192]          : torch.bfloat16
 117 :       8192 : layers.12.ffn_norm.weight           : [8192]          : torch.bfloat16
 118 :    8388608 : layers.13.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 119 :    1048576 : layers.13.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 120 :    1048576 : layers.13.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 121 :    8388608 : layers.13.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 122 :   29360128 : layers.13.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 123 :   29360128 : layers.13.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 124 :   29360128 : layers.13.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 125 :       8192 : layers.13.attention_norm.weight     : [8192]          : torch.bfloat16
 126 :       8192 : layers.13.ffn_norm.weight           : [8192]          : torch.bfloat16
 127 :    8388608 : layers.14.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 128 :    1048576 : layers.14.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 129 :    1048576 : layers.14.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 130 :    8388608 : layers.14.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 131 :   29360128 : layers.14.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 132 :   29360128 : layers.14.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 133 :   29360128 : layers.14.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 134 :       8192 : layers.14.attention_norm.weight     : [8192]          : torch.bfloat16
 135 :       8192 : layers.14.ffn_norm.weight           : [8192]          : torch.bfloat16
 136 :    8388608 : layers.15.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 137 :    1048576 : layers.15.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 138 :    1048576 : layers.15.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 139 :    8388608 : layers.15.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 140 :   29360128 : layers.15.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 141 :   29360128 : layers.15.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 142 :   29360128 : layers.15.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 143 :       8192 : layers.15.attention_norm.weight     : [8192]          : torch.bfloat16
 144 :       8192 : layers.15.ffn_norm.weight           : [8192]          : torch.bfloat16
 145 :    8388608 : layers.16.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 146 :    1048576 : layers.16.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 147 :    1048576 : layers.16.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 148 :    8388608 : layers.16.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 149 :   29360128 : layers.16.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 150 :   29360128 : layers.16.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 151 :   29360128 : layers.16.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 152 :       8192 : layers.16.attention_norm.weight     : [8192]          : torch.bfloat16
 153 :       8192 : layers.16.ffn_norm.weight           : [8192]          : torch.bfloat16
 154 :    8388608 : layers.17.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 155 :    1048576 : layers.17.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 156 :    1048576 : layers.17.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 157 :    8388608 : layers.17.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 158 :   29360128 : layers.17.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 159 :   29360128 : layers.17.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 160 :   29360128 : layers.17.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 161 :       8192 : layers.17.attention_norm.weight     : [8192]          : torch.bfloat16
 162 :       8192 : layers.17.ffn_norm.weight           : [8192]          : torch.bfloat16
 163 :    8388608 : layers.18.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 164 :    1048576 : layers.18.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 165 :    1048576 : layers.18.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 166 :    8388608 : layers.18.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 167 :   29360128 : layers.18.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 168 :   29360128 : layers.18.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 169 :   29360128 : layers.18.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 170 :       8192 : layers.18.attention_norm.weight     : [8192]          : torch.bfloat16
 171 :       8192 : layers.18.ffn_norm.weight           : [8192]          : torch.bfloat16
 172 :    8388608 : layers.19.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 173 :    1048576 : layers.19.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 174 :    1048576 : layers.19.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 175 :    8388608 : layers.19.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 176 :   29360128 : layers.19.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 177 :   29360128 : layers.19.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 178 :   29360128 : layers.19.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 179 :       8192 : layers.19.attention_norm.weight     : [8192]          : torch.bfloat16
 180 :       8192 : layers.19.ffn_norm.weight           : [8192]          : torch.bfloat16
 181 :    8388608 : layers.20.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 182 :    1048576 : layers.20.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 183 :    1048576 : layers.20.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 184 :    8388608 : layers.20.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 185 :   29360128 : layers.20.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 186 :   29360128 : layers.20.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 187 :   29360128 : layers.20.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 188 :       8192 : layers.20.attention_norm.weight     : [8192]          : torch.bfloat16
 189 :       8192 : layers.20.ffn_norm.weight           : [8192]          : torch.bfloat16
 190 :    8388608 : layers.21.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 191 :    1048576 : layers.21.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 192 :    1048576 : layers.21.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 193 :    8388608 : layers.21.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 194 :   29360128 : layers.21.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 195 :   29360128 : layers.21.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 196 :   29360128 : layers.21.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 197 :       8192 : layers.21.attention_norm.weight     : [8192]          : torch.bfloat16
 198 :       8192 : layers.21.ffn_norm.weight           : [8192]          : torch.bfloat16
 199 :    8388608 : layers.22.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 200 :    1048576 : layers.22.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 201 :    1048576 : layers.22.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 202 :    8388608 : layers.22.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 203 :   29360128 : layers.22.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 204 :   29360128 : layers.22.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 205 :   29360128 : layers.22.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 206 :       8192 : layers.22.attention_norm.weight     : [8192]          : torch.bfloat16
 207 :       8192 : layers.22.ffn_norm.weight           : [8192]          : torch.bfloat16
 208 :    8388608 : layers.23.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 209 :    1048576 : layers.23.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 210 :    1048576 : layers.23.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 211 :    8388608 : layers.23.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 212 :   29360128 : layers.23.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 213 :   29360128 : layers.23.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 214 :   29360128 : layers.23.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 215 :       8192 : layers.23.attention_norm.weight     : [8192]          : torch.bfloat16
 216 :       8192 : layers.23.ffn_norm.weight           : [8192]          : torch.bfloat16
 217 :    8388608 : layers.24.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 218 :    1048576 : layers.24.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 219 :    1048576 : layers.24.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 220 :    8388608 : layers.24.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 221 :   29360128 : layers.24.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 222 :   29360128 : layers.24.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 223 :   29360128 : layers.24.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 224 :       8192 : layers.24.attention_norm.weight     : [8192]          : torch.bfloat16
 225 :       8192 : layers.24.ffn_norm.weight           : [8192]          : torch.bfloat16
 226 :    8388608 : layers.25.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 227 :    1048576 : layers.25.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 228 :    1048576 : layers.25.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 229 :    8388608 : layers.25.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 230 :   29360128 : layers.25.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 231 :   29360128 : layers.25.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 232 :   29360128 : layers.25.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 233 :       8192 : layers.25.attention_norm.weight     : [8192]          : torch.bfloat16
 234 :       8192 : layers.25.ffn_norm.weight           : [8192]          : torch.bfloat16
 235 :    8388608 : layers.26.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 236 :    1048576 : layers.26.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 237 :    1048576 : layers.26.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 238 :    8388608 : layers.26.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 239 :   29360128 : layers.26.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 240 :   29360128 : layers.26.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 241 :   29360128 : layers.26.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 242 :       8192 : layers.26.attention_norm.weight     : [8192]          : torch.bfloat16
 243 :       8192 : layers.26.ffn_norm.weight           : [8192]          : torch.bfloat16
 244 :    8388608 : layers.27.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 245 :    1048576 : layers.27.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 246 :    1048576 : layers.27.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 247 :    8388608 : layers.27.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 248 :   29360128 : layers.27.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 249 :   29360128 : layers.27.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 250 :   29360128 : layers.27.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 251 :       8192 : layers.27.attention_norm.weight     : [8192]          : torch.bfloat16
 252 :       8192 : layers.27.ffn_norm.weight           : [8192]          : torch.bfloat16
 253 :    8388608 : layers.28.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 254 :    1048576 : layers.28.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 255 :    1048576 : layers.28.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 256 :    8388608 : layers.28.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 257 :   29360128 : layers.28.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 258 :   29360128 : layers.28.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 259 :   29360128 : layers.28.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 260 :       8192 : layers.28.attention_norm.weight     : [8192]          : torch.bfloat16
 261 :       8192 : layers.28.ffn_norm.weight           : [8192]          : torch.bfloat16
 262 :    8388608 : layers.29.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 263 :    1048576 : layers.29.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 264 :    1048576 : layers.29.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 265 :    8388608 : layers.29.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 266 :   29360128 : layers.29.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 267 :   29360128 : layers.29.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 268 :   29360128 : layers.29.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 269 :       8192 : layers.29.attention_norm.weight     : [8192]          : torch.bfloat16
 270 :       8192 : layers.29.ffn_norm.weight           : [8192]          : torch.bfloat16
 271 :    8388608 : layers.30.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 272 :    1048576 : layers.30.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 273 :    1048576 : layers.30.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 274 :    8388608 : layers.30.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 275 :   29360128 : layers.30.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 276 :   29360128 : layers.30.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 277 :   29360128 : layers.30.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 278 :       8192 : layers.30.attention_norm.weight     : [8192]          : torch.bfloat16
 279 :       8192 : layers.30.ffn_norm.weight           : [8192]          : torch.bfloat16
 280 :    8388608 : layers.31.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 281 :    1048576 : layers.31.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 282 :    1048576 : layers.31.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 283 :    8388608 : layers.31.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 284 :   29360128 : layers.31.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 285 :   29360128 : layers.31.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 286 :   29360128 : layers.31.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 287 :       8192 : layers.31.attention_norm.weight     : [8192]          : torch.bfloat16
 288 :       8192 : layers.31.ffn_norm.weight           : [8192]          : torch.bfloat16
 289 :    8388608 : layers.32.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 290 :    1048576 : layers.32.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 291 :    1048576 : layers.32.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 292 :    8388608 : layers.32.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 293 :   29360128 : layers.32.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 294 :   29360128 : layers.32.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 295 :   29360128 : layers.32.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 296 :       8192 : layers.32.attention_norm.weight     : [8192]          : torch.bfloat16
 297 :       8192 : layers.32.ffn_norm.weight           : [8192]          : torch.bfloat16
 298 :    8388608 : layers.33.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 299 :    1048576 : layers.33.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 300 :    1048576 : layers.33.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 301 :    8388608 : layers.33.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 302 :   29360128 : layers.33.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 303 :   29360128 : layers.33.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 304 :   29360128 : layers.33.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 305 :       8192 : layers.33.attention_norm.weight     : [8192]          : torch.bfloat16
 306 :       8192 : layers.33.ffn_norm.weight           : [8192]          : torch.bfloat16
 307 :    8388608 : layers.34.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 308 :    1048576 : layers.34.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 309 :    1048576 : layers.34.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 310 :    8388608 : layers.34.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 311 :   29360128 : layers.34.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 312 :   29360128 : layers.34.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 313 :   29360128 : layers.34.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 314 :       8192 : layers.34.attention_norm.weight     : [8192]          : torch.bfloat16
 315 :       8192 : layers.34.ffn_norm.weight           : [8192]          : torch.bfloat16
 316 :    8388608 : layers.35.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 317 :    1048576 : layers.35.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 318 :    1048576 : layers.35.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 319 :    8388608 : layers.35.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 320 :   29360128 : layers.35.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 321 :   29360128 : layers.35.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 322 :   29360128 : layers.35.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 323 :       8192 : layers.35.attention_norm.weight     : [8192]          : torch.bfloat16
 324 :       8192 : layers.35.ffn_norm.weight           : [8192]          : torch.bfloat16
 325 :    8388608 : layers.36.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 326 :    1048576 : layers.36.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 327 :    1048576 : layers.36.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 328 :    8388608 : layers.36.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 329 :   29360128 : layers.36.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 330 :   29360128 : layers.36.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 331 :   29360128 : layers.36.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 332 :       8192 : layers.36.attention_norm.weight     : [8192]          : torch.bfloat16
 333 :       8192 : layers.36.ffn_norm.weight           : [8192]          : torch.bfloat16
 334 :    8388608 : layers.37.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 335 :    1048576 : layers.37.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 336 :    1048576 : layers.37.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 337 :    8388608 : layers.37.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 338 :   29360128 : layers.37.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 339 :   29360128 : layers.37.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 340 :   29360128 : layers.37.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 341 :       8192 : layers.37.attention_norm.weight     : [8192]          : torch.bfloat16
 342 :       8192 : layers.37.ffn_norm.weight           : [8192]          : torch.bfloat16
 343 :    8388608 : layers.38.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 344 :    1048576 : layers.38.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 345 :    1048576 : layers.38.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 346 :    8388608 : layers.38.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 347 :   29360128 : layers.38.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 348 :   29360128 : layers.38.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 349 :   29360128 : layers.38.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 350 :       8192 : layers.38.attention_norm.weight     : [8192]          : torch.bfloat16
 351 :       8192 : layers.38.ffn_norm.weight           : [8192]          : torch.bfloat16
 352 :    8388608 : layers.39.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 353 :    1048576 : layers.39.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 354 :    1048576 : layers.39.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 355 :    8388608 : layers.39.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 356 :   29360128 : layers.39.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 357 :   29360128 : layers.39.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 358 :   29360128 : layers.39.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 359 :       8192 : layers.39.attention_norm.weight     : [8192]          : torch.bfloat16
 360 :       8192 : layers.39.ffn_norm.weight           : [8192]          : torch.bfloat16
 361 :    8388608 : layers.40.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 362 :    1048576 : layers.40.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 363 :    1048576 : layers.40.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 364 :    8388608 : layers.40.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 365 :   29360128 : layers.40.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 366 :   29360128 : layers.40.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 367 :   29360128 : layers.40.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 368 :       8192 : layers.40.attention_norm.weight     : [8192]          : torch.bfloat16
 369 :       8192 : layers.40.ffn_norm.weight           : [8192]          : torch.bfloat16
 370 :    8388608 : layers.41.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 371 :    1048576 : layers.41.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 372 :    1048576 : layers.41.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 373 :    8388608 : layers.41.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 374 :   29360128 : layers.41.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 375 :   29360128 : layers.41.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 376 :   29360128 : layers.41.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 377 :       8192 : layers.41.attention_norm.weight     : [8192]          : torch.bfloat16
 378 :       8192 : layers.41.ffn_norm.weight           : [8192]          : torch.bfloat16
 379 :    8388608 : layers.42.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 380 :    1048576 : layers.42.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 381 :    1048576 : layers.42.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 382 :    8388608 : layers.42.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 383 :   29360128 : layers.42.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 384 :   29360128 : layers.42.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 385 :   29360128 : layers.42.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 386 :       8192 : layers.42.attention_norm.weight     : [8192]          : torch.bfloat16
 387 :       8192 : layers.42.ffn_norm.weight           : [8192]          : torch.bfloat16
 388 :    8388608 : layers.43.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 389 :    1048576 : layers.43.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 390 :    1048576 : layers.43.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 391 :    8388608 : layers.43.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 392 :   29360128 : layers.43.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 393 :   29360128 : layers.43.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 394 :   29360128 : layers.43.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 395 :       8192 : layers.43.attention_norm.weight     : [8192]          : torch.bfloat16
 396 :       8192 : layers.43.ffn_norm.weight           : [8192]          : torch.bfloat16
 397 :    8388608 : layers.44.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 398 :    1048576 : layers.44.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 399 :    1048576 : layers.44.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 400 :    8388608 : layers.44.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 401 :   29360128 : layers.44.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 402 :   29360128 : layers.44.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 403 :   29360128 : layers.44.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 404 :       8192 : layers.44.attention_norm.weight     : [8192]          : torch.bfloat16
 405 :       8192 : layers.44.ffn_norm.weight           : [8192]          : torch.bfloat16
 406 :    8388608 : layers.45.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 407 :    1048576 : layers.45.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 408 :    1048576 : layers.45.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 409 :    8388608 : layers.45.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 410 :   29360128 : layers.45.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 411 :   29360128 : layers.45.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 412 :   29360128 : layers.45.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 413 :       8192 : layers.45.attention_norm.weight     : [8192]          : torch.bfloat16
 414 :       8192 : layers.45.ffn_norm.weight           : [8192]          : torch.bfloat16
 415 :    8388608 : layers.46.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 416 :    1048576 : layers.46.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 417 :    1048576 : layers.46.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 418 :    8388608 : layers.46.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 419 :   29360128 : layers.46.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 420 :   29360128 : layers.46.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 421 :   29360128 : layers.46.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 422 :       8192 : layers.46.attention_norm.weight     : [8192]          : torch.bfloat16
 423 :       8192 : layers.46.ffn_norm.weight           : [8192]          : torch.bfloat16
 424 :    8388608 : layers.47.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 425 :    1048576 : layers.47.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 426 :    1048576 : layers.47.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 427 :    8388608 : layers.47.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 428 :   29360128 : layers.47.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 429 :   29360128 : layers.47.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 430 :   29360128 : layers.47.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 431 :       8192 : layers.47.attention_norm.weight     : [8192]          : torch.bfloat16
 432 :       8192 : layers.47.ffn_norm.weight           : [8192]          : torch.bfloat16
 433 :    8388608 : layers.48.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 434 :    1048576 : layers.48.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 435 :    1048576 : layers.48.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 436 :    8388608 : layers.48.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 437 :   29360128 : layers.48.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 438 :   29360128 : layers.48.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 439 :   29360128 : layers.48.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 440 :       8192 : layers.48.attention_norm.weight     : [8192]          : torch.bfloat16
 441 :       8192 : layers.48.ffn_norm.weight           : [8192]          : torch.bfloat16
 442 :    8388608 : layers.49.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 443 :    1048576 : layers.49.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 444 :    1048576 : layers.49.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 445 :    8388608 : layers.49.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 446 :   29360128 : layers.49.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 447 :   29360128 : layers.49.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 448 :   29360128 : layers.49.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 449 :       8192 : layers.49.attention_norm.weight     : [8192]          : torch.bfloat16
 450 :       8192 : layers.49.ffn_norm.weight           : [8192]          : torch.bfloat16
 451 :    8388608 : layers.50.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 452 :    1048576 : layers.50.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 453 :    1048576 : layers.50.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 454 :    8388608 : layers.50.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 455 :   29360128 : layers.50.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 456 :   29360128 : layers.50.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 457 :   29360128 : layers.50.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 458 :       8192 : layers.50.attention_norm.weight     : [8192]          : torch.bfloat16
 459 :       8192 : layers.50.ffn_norm.weight           : [8192]          : torch.bfloat16
 460 :    8388608 : layers.51.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 461 :    1048576 : layers.51.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 462 :    1048576 : layers.51.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 463 :    8388608 : layers.51.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 464 :   29360128 : layers.51.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 465 :   29360128 : layers.51.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 466 :   29360128 : layers.51.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 467 :       8192 : layers.51.attention_norm.weight     : [8192]          : torch.bfloat16
 468 :       8192 : layers.51.ffn_norm.weight           : [8192]          : torch.bfloat16
 469 :    8388608 : layers.52.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 470 :    1048576 : layers.52.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 471 :    1048576 : layers.52.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 472 :    8388608 : layers.52.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 473 :   29360128 : layers.52.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 474 :   29360128 : layers.52.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 475 :   29360128 : layers.52.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 476 :       8192 : layers.52.attention_norm.weight     : [8192]          : torch.bfloat16
 477 :       8192 : layers.52.ffn_norm.weight           : [8192]          : torch.bfloat16
 478 :    8388608 : layers.53.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 479 :    1048576 : layers.53.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 480 :    1048576 : layers.53.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 481 :    8388608 : layers.53.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 482 :   29360128 : layers.53.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 483 :   29360128 : layers.53.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 484 :   29360128 : layers.53.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 485 :       8192 : layers.53.attention_norm.weight     : [8192]          : torch.bfloat16
 486 :       8192 : layers.53.ffn_norm.weight           : [8192]          : torch.bfloat16
 487 :    8388608 : layers.54.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 488 :    1048576 : layers.54.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 489 :    1048576 : layers.54.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 490 :    8388608 : layers.54.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 491 :   29360128 : layers.54.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 492 :   29360128 : layers.54.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 493 :   29360128 : layers.54.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 494 :       8192 : layers.54.attention_norm.weight     : [8192]          : torch.bfloat16
 495 :       8192 : layers.54.ffn_norm.weight           : [8192]          : torch.bfloat16
 496 :    8388608 : layers.55.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 497 :    1048576 : layers.55.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 498 :    1048576 : layers.55.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 499 :    8388608 : layers.55.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 500 :   29360128 : layers.55.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 501 :   29360128 : layers.55.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 502 :   29360128 : layers.55.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 503 :       8192 : layers.55.attention_norm.weight     : [8192]          : torch.bfloat16
 504 :       8192 : layers.55.ffn_norm.weight           : [8192]          : torch.bfloat16
 505 :    8388608 : layers.56.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 506 :    1048576 : layers.56.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 507 :    1048576 : layers.56.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 508 :    8388608 : layers.56.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 509 :   29360128 : layers.56.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 510 :   29360128 : layers.56.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 511 :   29360128 : layers.56.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 512 :       8192 : layers.56.attention_norm.weight     : [8192]          : torch.bfloat16
 513 :       8192 : layers.56.ffn_norm.weight           : [8192]          : torch.bfloat16
 514 :    8388608 : layers.57.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 515 :    1048576 : layers.57.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 516 :    1048576 : layers.57.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 517 :    8388608 : layers.57.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 518 :   29360128 : layers.57.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 519 :   29360128 : layers.57.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 520 :   29360128 : layers.57.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 521 :       8192 : layers.57.attention_norm.weight     : [8192]          : torch.bfloat16
 522 :       8192 : layers.57.ffn_norm.weight           : [8192]          : torch.bfloat16
 523 :    8388608 : layers.58.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 524 :    1048576 : layers.58.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 525 :    1048576 : layers.58.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 526 :    8388608 : layers.58.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 527 :   29360128 : layers.58.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 528 :   29360128 : layers.58.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 529 :   29360128 : layers.58.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 530 :       8192 : layers.58.attention_norm.weight     : [8192]          : torch.bfloat16
 531 :       8192 : layers.58.ffn_norm.weight           : [8192]          : torch.bfloat16
 532 :    8388608 : layers.59.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 533 :    1048576 : layers.59.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 534 :    1048576 : layers.59.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 535 :    8388608 : layers.59.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 536 :   29360128 : layers.59.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 537 :   29360128 : layers.59.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 538 :   29360128 : layers.59.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 539 :       8192 : layers.59.attention_norm.weight     : [8192]          : torch.bfloat16
 540 :       8192 : layers.59.ffn_norm.weight           : [8192]          : torch.bfloat16
 541 :    8388608 : layers.60.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 542 :    1048576 : layers.60.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 543 :    1048576 : layers.60.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 544 :    8388608 : layers.60.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 545 :   29360128 : layers.60.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 546 :   29360128 : layers.60.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 547 :   29360128 : layers.60.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 548 :       8192 : layers.60.attention_norm.weight     : [8192]          : torch.bfloat16
 549 :       8192 : layers.60.ffn_norm.weight           : [8192]          : torch.bfloat16
 550 :    8388608 : layers.61.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 551 :    1048576 : layers.61.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 552 :    1048576 : layers.61.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 553 :    8388608 : layers.61.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 554 :   29360128 : layers.61.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 555 :   29360128 : layers.61.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 556 :   29360128 : layers.61.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 557 :       8192 : layers.61.attention_norm.weight     : [8192]          : torch.bfloat16
 558 :       8192 : layers.61.ffn_norm.weight           : [8192]          : torch.bfloat16
 559 :    8388608 : layers.62.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 560 :    1048576 : layers.62.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 561 :    1048576 : layers.62.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 562 :    8388608 : layers.62.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 563 :   29360128 : layers.62.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 564 :   29360128 : layers.62.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 565 :   29360128 : layers.62.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 566 :       8192 : layers.62.attention_norm.weight     : [8192]          : torch.bfloat16
 567 :       8192 : layers.62.ffn_norm.weight           : [8192]          : torch.bfloat16
 568 :    8388608 : layers.63.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 569 :    1048576 : layers.63.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 570 :    1048576 : layers.63.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 571 :    8388608 : layers.63.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 572 :   29360128 : layers.63.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 573 :   29360128 : layers.63.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 574 :   29360128 : layers.63.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 575 :       8192 : layers.63.attention_norm.weight     : [8192]          : torch.bfloat16
 576 :       8192 : layers.63.ffn_norm.weight           : [8192]          : torch.bfloat16
 577 :    8388608 : layers.64.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 578 :    1048576 : layers.64.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 579 :    1048576 : layers.64.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 580 :    8388608 : layers.64.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 581 :   29360128 : layers.64.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 582 :   29360128 : layers.64.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 583 :   29360128 : layers.64.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 584 :       8192 : layers.64.attention_norm.weight     : [8192]          : torch.bfloat16
 585 :       8192 : layers.64.ffn_norm.weight           : [8192]          : torch.bfloat16
 586 :    8388608 : layers.65.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 587 :    1048576 : layers.65.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 588 :    1048576 : layers.65.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 589 :    8388608 : layers.65.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 590 :   29360128 : layers.65.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 591 :   29360128 : layers.65.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 592 :   29360128 : layers.65.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 593 :       8192 : layers.65.attention_norm.weight     : [8192]          : torch.bfloat16
 594 :       8192 : layers.65.ffn_norm.weight           : [8192]          : torch.bfloat16
 595 :    8388608 : layers.66.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 596 :    1048576 : layers.66.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 597 :    1048576 : layers.66.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 598 :    8388608 : layers.66.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 599 :   29360128 : layers.66.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 600 :   29360128 : layers.66.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 601 :   29360128 : layers.66.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 602 :       8192 : layers.66.attention_norm.weight     : [8192]          : torch.bfloat16
 603 :       8192 : layers.66.ffn_norm.weight           : [8192]          : torch.bfloat16
 604 :    8388608 : layers.67.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 605 :    1048576 : layers.67.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 606 :    1048576 : layers.67.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 607 :    8388608 : layers.67.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 608 :   29360128 : layers.67.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 609 :   29360128 : layers.67.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 610 :   29360128 : layers.67.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 611 :       8192 : layers.67.attention_norm.weight     : [8192]          : torch.bfloat16
 612 :       8192 : layers.67.ffn_norm.weight           : [8192]          : torch.bfloat16
 613 :    8388608 : layers.68.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 614 :    1048576 : layers.68.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 615 :    1048576 : layers.68.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 616 :    8388608 : layers.68.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 617 :   29360128 : layers.68.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 618 :   29360128 : layers.68.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 619 :   29360128 : layers.68.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 620 :       8192 : layers.68.attention_norm.weight     : [8192]          : torch.bfloat16
 621 :       8192 : layers.68.ffn_norm.weight           : [8192]          : torch.bfloat16
 622 :    8388608 : layers.69.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 623 :    1048576 : layers.69.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 624 :    1048576 : layers.69.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 625 :    8388608 : layers.69.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 626 :   29360128 : layers.69.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 627 :   29360128 : layers.69.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 628 :   29360128 : layers.69.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 629 :       8192 : layers.69.attention_norm.weight     : [8192]          : torch.bfloat16
 630 :       8192 : layers.69.ffn_norm.weight           : [8192]          : torch.bfloat16
 631 :    8388608 : layers.70.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 632 :    1048576 : layers.70.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 633 :    1048576 : layers.70.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 634 :    8388608 : layers.70.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 635 :   29360128 : layers.70.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 636 :   29360128 : layers.70.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 637 :   29360128 : layers.70.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 638 :       8192 : layers.70.attention_norm.weight     : [8192]          : torch.bfloat16
 639 :       8192 : layers.70.ffn_norm.weight           : [8192]          : torch.bfloat16
 640 :    8388608 : layers.71.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 641 :    1048576 : layers.71.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 642 :    1048576 : layers.71.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 643 :    8388608 : layers.71.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 644 :   29360128 : layers.71.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 645 :   29360128 : layers.71.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 646 :   29360128 : layers.71.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 647 :       8192 : layers.71.attention_norm.weight     : [8192]          : torch.bfloat16
 648 :       8192 : layers.71.ffn_norm.weight           : [8192]          : torch.bfloat16
 649 :    8388608 : layers.72.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 650 :    1048576 : layers.72.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 651 :    1048576 : layers.72.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 652 :    8388608 : layers.72.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 653 :   29360128 : layers.72.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 654 :   29360128 : layers.72.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 655 :   29360128 : layers.72.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 656 :       8192 : layers.72.attention_norm.weight     : [8192]          : torch.bfloat16
 657 :       8192 : layers.72.ffn_norm.weight           : [8192]          : torch.bfloat16
 658 :    8388608 : layers.73.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 659 :    1048576 : layers.73.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 660 :    1048576 : layers.73.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 661 :    8388608 : layers.73.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 662 :   29360128 : layers.73.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 663 :   29360128 : layers.73.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 664 :   29360128 : layers.73.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 665 :       8192 : layers.73.attention_norm.weight     : [8192]          : torch.bfloat16
 666 :       8192 : layers.73.ffn_norm.weight           : [8192]          : torch.bfloat16
 667 :    8388608 : layers.74.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 668 :    1048576 : layers.74.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 669 :    1048576 : layers.74.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 670 :    8388608 : layers.74.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 671 :   29360128 : layers.74.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 672 :   29360128 : layers.74.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 673 :   29360128 : layers.74.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 674 :       8192 : layers.74.attention_norm.weight     : [8192]          : torch.bfloat16
 675 :       8192 : layers.74.ffn_norm.weight           : [8192]          : torch.bfloat16
 676 :    8388608 : layers.75.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 677 :    1048576 : layers.75.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 678 :    1048576 : layers.75.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 679 :    8388608 : layers.75.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 680 :   29360128 : layers.75.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 681 :   29360128 : layers.75.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 682 :   29360128 : layers.75.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 683 :       8192 : layers.75.attention_norm.weight     : [8192]          : torch.bfloat16
 684 :       8192 : layers.75.ffn_norm.weight           : [8192]          : torch.bfloat16
 685 :    8388608 : layers.76.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 686 :    1048576 : layers.76.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 687 :    1048576 : layers.76.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 688 :    8388608 : layers.76.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 689 :   29360128 : layers.76.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 690 :   29360128 : layers.76.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 691 :   29360128 : layers.76.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 692 :       8192 : layers.76.attention_norm.weight     : [8192]          : torch.bfloat16
 693 :       8192 : layers.76.ffn_norm.weight           : [8192]          : torch.bfloat16
 694 :    8388608 : layers.77.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 695 :    1048576 : layers.77.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 696 :    1048576 : layers.77.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 697 :    8388608 : layers.77.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 698 :   29360128 : layers.77.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 699 :   29360128 : layers.77.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 700 :   29360128 : layers.77.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 701 :       8192 : layers.77.attention_norm.weight     : [8192]          : torch.bfloat16
 702 :       8192 : layers.77.ffn_norm.weight           : [8192]          : torch.bfloat16
 703 :    8388608 : layers.78.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 704 :    1048576 : layers.78.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 705 :    1048576 : layers.78.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 706 :    8388608 : layers.78.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 707 :   29360128 : layers.78.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 708 :   29360128 : layers.78.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 709 :   29360128 : layers.78.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 710 :       8192 : layers.78.attention_norm.weight     : [8192]          : torch.bfloat16
 711 :       8192 : layers.78.ffn_norm.weight           : [8192]          : torch.bfloat16
 712 :    8388608 : layers.79.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 713 :    1048576 : layers.79.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 714 :    1048576 : layers.79.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 715 :    8388608 : layers.79.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 716 :   29360128 : layers.79.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 717 :   29360128 : layers.79.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 718 :   29360128 : layers.79.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 719 :       8192 : layers.79.attention_norm.weight     : [8192]          : torch.bfloat16
 720 :       8192 : layers.79.ffn_norm.weight           : [8192]          : torch.bfloat16
 721 :       8192 : norm.weight                         : [8192]          : torch.bfloat16
 722 :  131334144 : output.weight                       : [16032, 8192]   : torch.bfloat16

-----------------------------------------------------------------------------
61742571520 params in total.
123485143040 bytes in total.
40.41 sec, 4.91 sec, 2913.90 MB/s
-----------------------------------------------------------------------------

[8/8]: Loading original/consolidated.07.pth
   0 :  131334144 : tok_embeddings.weight               : [16032, 8192]   : torch.bfloat16
   1 :    8388608 : layers.0.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
   2 :    1048576 : layers.0.attention.wk.weight        : [128, 8192]     : torch.bfloat16
   3 :    1048576 : layers.0.attention.wv.weight        : [128, 8192]     : torch.bfloat16
   4 :    8388608 : layers.0.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
   5 :   29360128 : layers.0.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
   6 :   29360128 : layers.0.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
   7 :   29360128 : layers.0.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
   8 :       8192 : layers.0.attention_norm.weight      : [8192]          : torch.bfloat16
   9 :       8192 : layers.0.ffn_norm.weight            : [8192]          : torch.bfloat16
  10 :    8388608 : layers.1.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  11 :    1048576 : layers.1.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  12 :    1048576 : layers.1.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  13 :    8388608 : layers.1.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  14 :   29360128 : layers.1.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  15 :   29360128 : layers.1.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  16 :   29360128 : layers.1.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  17 :       8192 : layers.1.attention_norm.weight      : [8192]          : torch.bfloat16
  18 :       8192 : layers.1.ffn_norm.weight            : [8192]          : torch.bfloat16
  19 :    8388608 : layers.2.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  20 :    1048576 : layers.2.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  21 :    1048576 : layers.2.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  22 :    8388608 : layers.2.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  23 :   29360128 : layers.2.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  24 :   29360128 : layers.2.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  25 :   29360128 : layers.2.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  26 :       8192 : layers.2.attention_norm.weight      : [8192]          : torch.bfloat16
  27 :       8192 : layers.2.ffn_norm.weight            : [8192]          : torch.bfloat16
  28 :    8388608 : layers.3.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  29 :    1048576 : layers.3.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  30 :    1048576 : layers.3.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  31 :    8388608 : layers.3.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  32 :   29360128 : layers.3.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  33 :   29360128 : layers.3.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  34 :   29360128 : layers.3.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  35 :       8192 : layers.3.attention_norm.weight      : [8192]          : torch.bfloat16
  36 :       8192 : layers.3.ffn_norm.weight            : [8192]          : torch.bfloat16
  37 :    8388608 : layers.4.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  38 :    1048576 : layers.4.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  39 :    1048576 : layers.4.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  40 :    8388608 : layers.4.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  41 :   29360128 : layers.4.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  42 :   29360128 : layers.4.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  43 :   29360128 : layers.4.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  44 :       8192 : layers.4.attention_norm.weight      : [8192]          : torch.bfloat16
  45 :       8192 : layers.4.ffn_norm.weight            : [8192]          : torch.bfloat16
  46 :    8388608 : layers.5.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  47 :    1048576 : layers.5.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  48 :    1048576 : layers.5.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  49 :    8388608 : layers.5.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  50 :   29360128 : layers.5.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  51 :   29360128 : layers.5.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  52 :   29360128 : layers.5.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  53 :       8192 : layers.5.attention_norm.weight      : [8192]          : torch.bfloat16
  54 :       8192 : layers.5.ffn_norm.weight            : [8192]          : torch.bfloat16
  55 :    8388608 : layers.6.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  56 :    1048576 : layers.6.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  57 :    1048576 : layers.6.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  58 :    8388608 : layers.6.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  59 :   29360128 : layers.6.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  60 :   29360128 : layers.6.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  61 :   29360128 : layers.6.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  62 :       8192 : layers.6.attention_norm.weight      : [8192]          : torch.bfloat16
  63 :       8192 : layers.6.ffn_norm.weight            : [8192]          : torch.bfloat16
  64 :    8388608 : layers.7.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  65 :    1048576 : layers.7.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  66 :    1048576 : layers.7.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  67 :    8388608 : layers.7.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  68 :   29360128 : layers.7.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  69 :   29360128 : layers.7.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  70 :   29360128 : layers.7.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  71 :       8192 : layers.7.attention_norm.weight      : [8192]          : torch.bfloat16
  72 :       8192 : layers.7.ffn_norm.weight            : [8192]          : torch.bfloat16
  73 :    8388608 : layers.8.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  74 :    1048576 : layers.8.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  75 :    1048576 : layers.8.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  76 :    8388608 : layers.8.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  77 :   29360128 : layers.8.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  78 :   29360128 : layers.8.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  79 :   29360128 : layers.8.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  80 :       8192 : layers.8.attention_norm.weight      : [8192]          : torch.bfloat16
  81 :       8192 : layers.8.ffn_norm.weight            : [8192]          : torch.bfloat16
  82 :    8388608 : layers.9.attention.wq.weight        : [1024, 8192]    : torch.bfloat16
  83 :    1048576 : layers.9.attention.wk.weight        : [128, 8192]     : torch.bfloat16
  84 :    1048576 : layers.9.attention.wv.weight        : [128, 8192]     : torch.bfloat16
  85 :    8388608 : layers.9.attention.wo.weight        : [8192, 1024]    : torch.bfloat16
  86 :   29360128 : layers.9.feed_forward.w1.weight     : [3584, 8192]    : torch.bfloat16
  87 :   29360128 : layers.9.feed_forward.w3.weight     : [3584, 8192]    : torch.bfloat16
  88 :   29360128 : layers.9.feed_forward.w2.weight     : [8192, 3584]    : torch.bfloat16
  89 :       8192 : layers.9.attention_norm.weight      : [8192]          : torch.bfloat16
  90 :       8192 : layers.9.ffn_norm.weight            : [8192]          : torch.bfloat16
  91 :    8388608 : layers.10.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
  92 :    1048576 : layers.10.attention.wk.weight       : [128, 8192]     : torch.bfloat16
  93 :    1048576 : layers.10.attention.wv.weight       : [128, 8192]     : torch.bfloat16
  94 :    8388608 : layers.10.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
  95 :   29360128 : layers.10.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
  96 :   29360128 : layers.10.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
  97 :   29360128 : layers.10.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
  98 :       8192 : layers.10.attention_norm.weight     : [8192]          : torch.bfloat16
  99 :       8192 : layers.10.ffn_norm.weight           : [8192]          : torch.bfloat16
 100 :    8388608 : layers.11.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 101 :    1048576 : layers.11.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 102 :    1048576 : layers.11.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 103 :    8388608 : layers.11.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 104 :   29360128 : layers.11.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 105 :   29360128 : layers.11.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 106 :   29360128 : layers.11.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 107 :       8192 : layers.11.attention_norm.weight     : [8192]          : torch.bfloat16
 108 :       8192 : layers.11.ffn_norm.weight           : [8192]          : torch.bfloat16
 109 :    8388608 : layers.12.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 110 :    1048576 : layers.12.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 111 :    1048576 : layers.12.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 112 :    8388608 : layers.12.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 113 :   29360128 : layers.12.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 114 :   29360128 : layers.12.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 115 :   29360128 : layers.12.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 116 :       8192 : layers.12.attention_norm.weight     : [8192]          : torch.bfloat16
 117 :       8192 : layers.12.ffn_norm.weight           : [8192]          : torch.bfloat16
 118 :    8388608 : layers.13.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 119 :    1048576 : layers.13.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 120 :    1048576 : layers.13.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 121 :    8388608 : layers.13.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 122 :   29360128 : layers.13.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 123 :   29360128 : layers.13.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 124 :   29360128 : layers.13.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 125 :       8192 : layers.13.attention_norm.weight     : [8192]          : torch.bfloat16
 126 :       8192 : layers.13.ffn_norm.weight           : [8192]          : torch.bfloat16
 127 :    8388608 : layers.14.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 128 :    1048576 : layers.14.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 129 :    1048576 : layers.14.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 130 :    8388608 : layers.14.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 131 :   29360128 : layers.14.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 132 :   29360128 : layers.14.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 133 :   29360128 : layers.14.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 134 :       8192 : layers.14.attention_norm.weight     : [8192]          : torch.bfloat16
 135 :       8192 : layers.14.ffn_norm.weight           : [8192]          : torch.bfloat16
 136 :    8388608 : layers.15.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 137 :    1048576 : layers.15.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 138 :    1048576 : layers.15.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 139 :    8388608 : layers.15.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 140 :   29360128 : layers.15.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 141 :   29360128 : layers.15.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 142 :   29360128 : layers.15.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 143 :       8192 : layers.15.attention_norm.weight     : [8192]          : torch.bfloat16
 144 :       8192 : layers.15.ffn_norm.weight           : [8192]          : torch.bfloat16
 145 :    8388608 : layers.16.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 146 :    1048576 : layers.16.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 147 :    1048576 : layers.16.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 148 :    8388608 : layers.16.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 149 :   29360128 : layers.16.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 150 :   29360128 : layers.16.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 151 :   29360128 : layers.16.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 152 :       8192 : layers.16.attention_norm.weight     : [8192]          : torch.bfloat16
 153 :       8192 : layers.16.ffn_norm.weight           : [8192]          : torch.bfloat16
 154 :    8388608 : layers.17.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 155 :    1048576 : layers.17.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 156 :    1048576 : layers.17.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 157 :    8388608 : layers.17.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 158 :   29360128 : layers.17.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 159 :   29360128 : layers.17.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 160 :   29360128 : layers.17.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 161 :       8192 : layers.17.attention_norm.weight     : [8192]          : torch.bfloat16
 162 :       8192 : layers.17.ffn_norm.weight           : [8192]          : torch.bfloat16
 163 :    8388608 : layers.18.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 164 :    1048576 : layers.18.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 165 :    1048576 : layers.18.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 166 :    8388608 : layers.18.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 167 :   29360128 : layers.18.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 168 :   29360128 : layers.18.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 169 :   29360128 : layers.18.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 170 :       8192 : layers.18.attention_norm.weight     : [8192]          : torch.bfloat16
 171 :       8192 : layers.18.ffn_norm.weight           : [8192]          : torch.bfloat16
 172 :    8388608 : layers.19.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 173 :    1048576 : layers.19.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 174 :    1048576 : layers.19.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 175 :    8388608 : layers.19.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 176 :   29360128 : layers.19.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 177 :   29360128 : layers.19.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 178 :   29360128 : layers.19.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 179 :       8192 : layers.19.attention_norm.weight     : [8192]          : torch.bfloat16
 180 :       8192 : layers.19.ffn_norm.weight           : [8192]          : torch.bfloat16
 181 :    8388608 : layers.20.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 182 :    1048576 : layers.20.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 183 :    1048576 : layers.20.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 184 :    8388608 : layers.20.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 185 :   29360128 : layers.20.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 186 :   29360128 : layers.20.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 187 :   29360128 : layers.20.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 188 :       8192 : layers.20.attention_norm.weight     : [8192]          : torch.bfloat16
 189 :       8192 : layers.20.ffn_norm.weight           : [8192]          : torch.bfloat16
 190 :    8388608 : layers.21.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 191 :    1048576 : layers.21.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 192 :    1048576 : layers.21.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 193 :    8388608 : layers.21.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 194 :   29360128 : layers.21.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 195 :   29360128 : layers.21.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 196 :   29360128 : layers.21.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 197 :       8192 : layers.21.attention_norm.weight     : [8192]          : torch.bfloat16
 198 :       8192 : layers.21.ffn_norm.weight           : [8192]          : torch.bfloat16
 199 :    8388608 : layers.22.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 200 :    1048576 : layers.22.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 201 :    1048576 : layers.22.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 202 :    8388608 : layers.22.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 203 :   29360128 : layers.22.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 204 :   29360128 : layers.22.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 205 :   29360128 : layers.22.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 206 :       8192 : layers.22.attention_norm.weight     : [8192]          : torch.bfloat16
 207 :       8192 : layers.22.ffn_norm.weight           : [8192]          : torch.bfloat16
 208 :    8388608 : layers.23.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 209 :    1048576 : layers.23.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 210 :    1048576 : layers.23.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 211 :    8388608 : layers.23.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 212 :   29360128 : layers.23.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 213 :   29360128 : layers.23.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 214 :   29360128 : layers.23.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 215 :       8192 : layers.23.attention_norm.weight     : [8192]          : torch.bfloat16
 216 :       8192 : layers.23.ffn_norm.weight           : [8192]          : torch.bfloat16
 217 :    8388608 : layers.24.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 218 :    1048576 : layers.24.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 219 :    1048576 : layers.24.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 220 :    8388608 : layers.24.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 221 :   29360128 : layers.24.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 222 :   29360128 : layers.24.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 223 :   29360128 : layers.24.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 224 :       8192 : layers.24.attention_norm.weight     : [8192]          : torch.bfloat16
 225 :       8192 : layers.24.ffn_norm.weight           : [8192]          : torch.bfloat16
 226 :    8388608 : layers.25.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 227 :    1048576 : layers.25.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 228 :    1048576 : layers.25.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 229 :    8388608 : layers.25.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 230 :   29360128 : layers.25.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 231 :   29360128 : layers.25.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 232 :   29360128 : layers.25.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 233 :       8192 : layers.25.attention_norm.weight     : [8192]          : torch.bfloat16
 234 :       8192 : layers.25.ffn_norm.weight           : [8192]          : torch.bfloat16
 235 :    8388608 : layers.26.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 236 :    1048576 : layers.26.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 237 :    1048576 : layers.26.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 238 :    8388608 : layers.26.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 239 :   29360128 : layers.26.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 240 :   29360128 : layers.26.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 241 :   29360128 : layers.26.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 242 :       8192 : layers.26.attention_norm.weight     : [8192]          : torch.bfloat16
 243 :       8192 : layers.26.ffn_norm.weight           : [8192]          : torch.bfloat16
 244 :    8388608 : layers.27.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 245 :    1048576 : layers.27.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 246 :    1048576 : layers.27.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 247 :    8388608 : layers.27.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 248 :   29360128 : layers.27.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 249 :   29360128 : layers.27.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 250 :   29360128 : layers.27.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 251 :       8192 : layers.27.attention_norm.weight     : [8192]          : torch.bfloat16
 252 :       8192 : layers.27.ffn_norm.weight           : [8192]          : torch.bfloat16
 253 :    8388608 : layers.28.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 254 :    1048576 : layers.28.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 255 :    1048576 : layers.28.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 256 :    8388608 : layers.28.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 257 :   29360128 : layers.28.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 258 :   29360128 : layers.28.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 259 :   29360128 : layers.28.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 260 :       8192 : layers.28.attention_norm.weight     : [8192]          : torch.bfloat16
 261 :       8192 : layers.28.ffn_norm.weight           : [8192]          : torch.bfloat16
 262 :    8388608 : layers.29.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 263 :    1048576 : layers.29.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 264 :    1048576 : layers.29.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 265 :    8388608 : layers.29.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 266 :   29360128 : layers.29.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 267 :   29360128 : layers.29.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 268 :   29360128 : layers.29.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 269 :       8192 : layers.29.attention_norm.weight     : [8192]          : torch.bfloat16
 270 :       8192 : layers.29.ffn_norm.weight           : [8192]          : torch.bfloat16
 271 :    8388608 : layers.30.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 272 :    1048576 : layers.30.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 273 :    1048576 : layers.30.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 274 :    8388608 : layers.30.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 275 :   29360128 : layers.30.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 276 :   29360128 : layers.30.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 277 :   29360128 : layers.30.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 278 :       8192 : layers.30.attention_norm.weight     : [8192]          : torch.bfloat16
 279 :       8192 : layers.30.ffn_norm.weight           : [8192]          : torch.bfloat16
 280 :    8388608 : layers.31.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 281 :    1048576 : layers.31.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 282 :    1048576 : layers.31.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 283 :    8388608 : layers.31.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 284 :   29360128 : layers.31.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 285 :   29360128 : layers.31.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 286 :   29360128 : layers.31.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 287 :       8192 : layers.31.attention_norm.weight     : [8192]          : torch.bfloat16
 288 :       8192 : layers.31.ffn_norm.weight           : [8192]          : torch.bfloat16
 289 :    8388608 : layers.32.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 290 :    1048576 : layers.32.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 291 :    1048576 : layers.32.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 292 :    8388608 : layers.32.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 293 :   29360128 : layers.32.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 294 :   29360128 : layers.32.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 295 :   29360128 : layers.32.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 296 :       8192 : layers.32.attention_norm.weight     : [8192]          : torch.bfloat16
 297 :       8192 : layers.32.ffn_norm.weight           : [8192]          : torch.bfloat16
 298 :    8388608 : layers.33.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 299 :    1048576 : layers.33.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 300 :    1048576 : layers.33.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 301 :    8388608 : layers.33.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 302 :   29360128 : layers.33.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 303 :   29360128 : layers.33.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 304 :   29360128 : layers.33.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 305 :       8192 : layers.33.attention_norm.weight     : [8192]          : torch.bfloat16
 306 :       8192 : layers.33.ffn_norm.weight           : [8192]          : torch.bfloat16
 307 :    8388608 : layers.34.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 308 :    1048576 : layers.34.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 309 :    1048576 : layers.34.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 310 :    8388608 : layers.34.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 311 :   29360128 : layers.34.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 312 :   29360128 : layers.34.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 313 :   29360128 : layers.34.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 314 :       8192 : layers.34.attention_norm.weight     : [8192]          : torch.bfloat16
 315 :       8192 : layers.34.ffn_norm.weight           : [8192]          : torch.bfloat16
 316 :    8388608 : layers.35.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 317 :    1048576 : layers.35.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 318 :    1048576 : layers.35.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 319 :    8388608 : layers.35.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 320 :   29360128 : layers.35.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 321 :   29360128 : layers.35.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 322 :   29360128 : layers.35.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 323 :       8192 : layers.35.attention_norm.weight     : [8192]          : torch.bfloat16
 324 :       8192 : layers.35.ffn_norm.weight           : [8192]          : torch.bfloat16
 325 :    8388608 : layers.36.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 326 :    1048576 : layers.36.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 327 :    1048576 : layers.36.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 328 :    8388608 : layers.36.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 329 :   29360128 : layers.36.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 330 :   29360128 : layers.36.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 331 :   29360128 : layers.36.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 332 :       8192 : layers.36.attention_norm.weight     : [8192]          : torch.bfloat16
 333 :       8192 : layers.36.ffn_norm.weight           : [8192]          : torch.bfloat16
 334 :    8388608 : layers.37.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 335 :    1048576 : layers.37.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 336 :    1048576 : layers.37.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 337 :    8388608 : layers.37.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 338 :   29360128 : layers.37.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 339 :   29360128 : layers.37.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 340 :   29360128 : layers.37.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 341 :       8192 : layers.37.attention_norm.weight     : [8192]          : torch.bfloat16
 342 :       8192 : layers.37.ffn_norm.weight           : [8192]          : torch.bfloat16
 343 :    8388608 : layers.38.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 344 :    1048576 : layers.38.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 345 :    1048576 : layers.38.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 346 :    8388608 : layers.38.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 347 :   29360128 : layers.38.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 348 :   29360128 : layers.38.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 349 :   29360128 : layers.38.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 350 :       8192 : layers.38.attention_norm.weight     : [8192]          : torch.bfloat16
 351 :       8192 : layers.38.ffn_norm.weight           : [8192]          : torch.bfloat16
 352 :    8388608 : layers.39.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 353 :    1048576 : layers.39.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 354 :    1048576 : layers.39.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 355 :    8388608 : layers.39.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 356 :   29360128 : layers.39.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 357 :   29360128 : layers.39.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 358 :   29360128 : layers.39.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 359 :       8192 : layers.39.attention_norm.weight     : [8192]          : torch.bfloat16
 360 :       8192 : layers.39.ffn_norm.weight           : [8192]          : torch.bfloat16
 361 :    8388608 : layers.40.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 362 :    1048576 : layers.40.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 363 :    1048576 : layers.40.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 364 :    8388608 : layers.40.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 365 :   29360128 : layers.40.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 366 :   29360128 : layers.40.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 367 :   29360128 : layers.40.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 368 :       8192 : layers.40.attention_norm.weight     : [8192]          : torch.bfloat16
 369 :       8192 : layers.40.ffn_norm.weight           : [8192]          : torch.bfloat16
 370 :    8388608 : layers.41.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 371 :    1048576 : layers.41.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 372 :    1048576 : layers.41.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 373 :    8388608 : layers.41.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 374 :   29360128 : layers.41.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 375 :   29360128 : layers.41.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 376 :   29360128 : layers.41.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 377 :       8192 : layers.41.attention_norm.weight     : [8192]          : torch.bfloat16
 378 :       8192 : layers.41.ffn_norm.weight           : [8192]          : torch.bfloat16
 379 :    8388608 : layers.42.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 380 :    1048576 : layers.42.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 381 :    1048576 : layers.42.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 382 :    8388608 : layers.42.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 383 :   29360128 : layers.42.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 384 :   29360128 : layers.42.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 385 :   29360128 : layers.42.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 386 :       8192 : layers.42.attention_norm.weight     : [8192]          : torch.bfloat16
 387 :       8192 : layers.42.ffn_norm.weight           : [8192]          : torch.bfloat16
 388 :    8388608 : layers.43.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 389 :    1048576 : layers.43.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 390 :    1048576 : layers.43.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 391 :    8388608 : layers.43.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 392 :   29360128 : layers.43.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 393 :   29360128 : layers.43.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 394 :   29360128 : layers.43.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 395 :       8192 : layers.43.attention_norm.weight     : [8192]          : torch.bfloat16
 396 :       8192 : layers.43.ffn_norm.weight           : [8192]          : torch.bfloat16
 397 :    8388608 : layers.44.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 398 :    1048576 : layers.44.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 399 :    1048576 : layers.44.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 400 :    8388608 : layers.44.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 401 :   29360128 : layers.44.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 402 :   29360128 : layers.44.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 403 :   29360128 : layers.44.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 404 :       8192 : layers.44.attention_norm.weight     : [8192]          : torch.bfloat16
 405 :       8192 : layers.44.ffn_norm.weight           : [8192]          : torch.bfloat16
 406 :    8388608 : layers.45.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 407 :    1048576 : layers.45.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 408 :    1048576 : layers.45.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 409 :    8388608 : layers.45.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 410 :   29360128 : layers.45.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 411 :   29360128 : layers.45.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 412 :   29360128 : layers.45.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 413 :       8192 : layers.45.attention_norm.weight     : [8192]          : torch.bfloat16
 414 :       8192 : layers.45.ffn_norm.weight           : [8192]          : torch.bfloat16
 415 :    8388608 : layers.46.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 416 :    1048576 : layers.46.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 417 :    1048576 : layers.46.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 418 :    8388608 : layers.46.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 419 :   29360128 : layers.46.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 420 :   29360128 : layers.46.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 421 :   29360128 : layers.46.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 422 :       8192 : layers.46.attention_norm.weight     : [8192]          : torch.bfloat16
 423 :       8192 : layers.46.ffn_norm.weight           : [8192]          : torch.bfloat16
 424 :    8388608 : layers.47.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 425 :    1048576 : layers.47.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 426 :    1048576 : layers.47.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 427 :    8388608 : layers.47.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 428 :   29360128 : layers.47.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 429 :   29360128 : layers.47.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 430 :   29360128 : layers.47.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 431 :       8192 : layers.47.attention_norm.weight     : [8192]          : torch.bfloat16
 432 :       8192 : layers.47.ffn_norm.weight           : [8192]          : torch.bfloat16
 433 :    8388608 : layers.48.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 434 :    1048576 : layers.48.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 435 :    1048576 : layers.48.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 436 :    8388608 : layers.48.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 437 :   29360128 : layers.48.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 438 :   29360128 : layers.48.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 439 :   29360128 : layers.48.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 440 :       8192 : layers.48.attention_norm.weight     : [8192]          : torch.bfloat16
 441 :       8192 : layers.48.ffn_norm.weight           : [8192]          : torch.bfloat16
 442 :    8388608 : layers.49.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 443 :    1048576 : layers.49.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 444 :    1048576 : layers.49.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 445 :    8388608 : layers.49.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 446 :   29360128 : layers.49.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 447 :   29360128 : layers.49.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 448 :   29360128 : layers.49.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 449 :       8192 : layers.49.attention_norm.weight     : [8192]          : torch.bfloat16
 450 :       8192 : layers.49.ffn_norm.weight           : [8192]          : torch.bfloat16
 451 :    8388608 : layers.50.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 452 :    1048576 : layers.50.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 453 :    1048576 : layers.50.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 454 :    8388608 : layers.50.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 455 :   29360128 : layers.50.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 456 :   29360128 : layers.50.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 457 :   29360128 : layers.50.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 458 :       8192 : layers.50.attention_norm.weight     : [8192]          : torch.bfloat16
 459 :       8192 : layers.50.ffn_norm.weight           : [8192]          : torch.bfloat16
 460 :    8388608 : layers.51.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 461 :    1048576 : layers.51.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 462 :    1048576 : layers.51.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 463 :    8388608 : layers.51.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 464 :   29360128 : layers.51.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 465 :   29360128 : layers.51.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 466 :   29360128 : layers.51.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 467 :       8192 : layers.51.attention_norm.weight     : [8192]          : torch.bfloat16
 468 :       8192 : layers.51.ffn_norm.weight           : [8192]          : torch.bfloat16
 469 :    8388608 : layers.52.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 470 :    1048576 : layers.52.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 471 :    1048576 : layers.52.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 472 :    8388608 : layers.52.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 473 :   29360128 : layers.52.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 474 :   29360128 : layers.52.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 475 :   29360128 : layers.52.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 476 :       8192 : layers.52.attention_norm.weight     : [8192]          : torch.bfloat16
 477 :       8192 : layers.52.ffn_norm.weight           : [8192]          : torch.bfloat16
 478 :    8388608 : layers.53.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 479 :    1048576 : layers.53.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 480 :    1048576 : layers.53.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 481 :    8388608 : layers.53.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 482 :   29360128 : layers.53.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 483 :   29360128 : layers.53.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 484 :   29360128 : layers.53.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 485 :       8192 : layers.53.attention_norm.weight     : [8192]          : torch.bfloat16
 486 :       8192 : layers.53.ffn_norm.weight           : [8192]          : torch.bfloat16
 487 :    8388608 : layers.54.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 488 :    1048576 : layers.54.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 489 :    1048576 : layers.54.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 490 :    8388608 : layers.54.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 491 :   29360128 : layers.54.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 492 :   29360128 : layers.54.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 493 :   29360128 : layers.54.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 494 :       8192 : layers.54.attention_norm.weight     : [8192]          : torch.bfloat16
 495 :       8192 : layers.54.ffn_norm.weight           : [8192]          : torch.bfloat16
 496 :    8388608 : layers.55.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 497 :    1048576 : layers.55.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 498 :    1048576 : layers.55.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 499 :    8388608 : layers.55.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 500 :   29360128 : layers.55.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 501 :   29360128 : layers.55.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 502 :   29360128 : layers.55.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 503 :       8192 : layers.55.attention_norm.weight     : [8192]          : torch.bfloat16
 504 :       8192 : layers.55.ffn_norm.weight           : [8192]          : torch.bfloat16
 505 :    8388608 : layers.56.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 506 :    1048576 : layers.56.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 507 :    1048576 : layers.56.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 508 :    8388608 : layers.56.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 509 :   29360128 : layers.56.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 510 :   29360128 : layers.56.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 511 :   29360128 : layers.56.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 512 :       8192 : layers.56.attention_norm.weight     : [8192]          : torch.bfloat16
 513 :       8192 : layers.56.ffn_norm.weight           : [8192]          : torch.bfloat16
 514 :    8388608 : layers.57.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 515 :    1048576 : layers.57.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 516 :    1048576 : layers.57.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 517 :    8388608 : layers.57.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 518 :   29360128 : layers.57.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 519 :   29360128 : layers.57.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 520 :   29360128 : layers.57.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 521 :       8192 : layers.57.attention_norm.weight     : [8192]          : torch.bfloat16
 522 :       8192 : layers.57.ffn_norm.weight           : [8192]          : torch.bfloat16
 523 :    8388608 : layers.58.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 524 :    1048576 : layers.58.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 525 :    1048576 : layers.58.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 526 :    8388608 : layers.58.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 527 :   29360128 : layers.58.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 528 :   29360128 : layers.58.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 529 :   29360128 : layers.58.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 530 :       8192 : layers.58.attention_norm.weight     : [8192]          : torch.bfloat16
 531 :       8192 : layers.58.ffn_norm.weight           : [8192]          : torch.bfloat16
 532 :    8388608 : layers.59.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 533 :    1048576 : layers.59.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 534 :    1048576 : layers.59.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 535 :    8388608 : layers.59.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 536 :   29360128 : layers.59.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 537 :   29360128 : layers.59.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 538 :   29360128 : layers.59.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 539 :       8192 : layers.59.attention_norm.weight     : [8192]          : torch.bfloat16
 540 :       8192 : layers.59.ffn_norm.weight           : [8192]          : torch.bfloat16
 541 :    8388608 : layers.60.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 542 :    1048576 : layers.60.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 543 :    1048576 : layers.60.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 544 :    8388608 : layers.60.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 545 :   29360128 : layers.60.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 546 :   29360128 : layers.60.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 547 :   29360128 : layers.60.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 548 :       8192 : layers.60.attention_norm.weight     : [8192]          : torch.bfloat16
 549 :       8192 : layers.60.ffn_norm.weight           : [8192]          : torch.bfloat16
 550 :    8388608 : layers.61.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 551 :    1048576 : layers.61.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 552 :    1048576 : layers.61.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 553 :    8388608 : layers.61.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 554 :   29360128 : layers.61.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 555 :   29360128 : layers.61.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 556 :   29360128 : layers.61.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 557 :       8192 : layers.61.attention_norm.weight     : [8192]          : torch.bfloat16
 558 :       8192 : layers.61.ffn_norm.weight           : [8192]          : torch.bfloat16
 559 :    8388608 : layers.62.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 560 :    1048576 : layers.62.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 561 :    1048576 : layers.62.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 562 :    8388608 : layers.62.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 563 :   29360128 : layers.62.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 564 :   29360128 : layers.62.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 565 :   29360128 : layers.62.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 566 :       8192 : layers.62.attention_norm.weight     : [8192]          : torch.bfloat16
 567 :       8192 : layers.62.ffn_norm.weight           : [8192]          : torch.bfloat16
 568 :    8388608 : layers.63.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 569 :    1048576 : layers.63.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 570 :    1048576 : layers.63.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 571 :    8388608 : layers.63.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 572 :   29360128 : layers.63.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 573 :   29360128 : layers.63.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 574 :   29360128 : layers.63.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 575 :       8192 : layers.63.attention_norm.weight     : [8192]          : torch.bfloat16
 576 :       8192 : layers.63.ffn_norm.weight           : [8192]          : torch.bfloat16
 577 :    8388608 : layers.64.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 578 :    1048576 : layers.64.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 579 :    1048576 : layers.64.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 580 :    8388608 : layers.64.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 581 :   29360128 : layers.64.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 582 :   29360128 : layers.64.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 583 :   29360128 : layers.64.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 584 :       8192 : layers.64.attention_norm.weight     : [8192]          : torch.bfloat16
 585 :       8192 : layers.64.ffn_norm.weight           : [8192]          : torch.bfloat16
 586 :    8388608 : layers.65.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 587 :    1048576 : layers.65.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 588 :    1048576 : layers.65.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 589 :    8388608 : layers.65.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 590 :   29360128 : layers.65.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 591 :   29360128 : layers.65.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 592 :   29360128 : layers.65.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 593 :       8192 : layers.65.attention_norm.weight     : [8192]          : torch.bfloat16
 594 :       8192 : layers.65.ffn_norm.weight           : [8192]          : torch.bfloat16
 595 :    8388608 : layers.66.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 596 :    1048576 : layers.66.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 597 :    1048576 : layers.66.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 598 :    8388608 : layers.66.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 599 :   29360128 : layers.66.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 600 :   29360128 : layers.66.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 601 :   29360128 : layers.66.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 602 :       8192 : layers.66.attention_norm.weight     : [8192]          : torch.bfloat16
 603 :       8192 : layers.66.ffn_norm.weight           : [8192]          : torch.bfloat16
 604 :    8388608 : layers.67.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 605 :    1048576 : layers.67.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 606 :    1048576 : layers.67.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 607 :    8388608 : layers.67.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 608 :   29360128 : layers.67.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 609 :   29360128 : layers.67.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 610 :   29360128 : layers.67.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 611 :       8192 : layers.67.attention_norm.weight     : [8192]          : torch.bfloat16
 612 :       8192 : layers.67.ffn_norm.weight           : [8192]          : torch.bfloat16
 613 :    8388608 : layers.68.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 614 :    1048576 : layers.68.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 615 :    1048576 : layers.68.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 616 :    8388608 : layers.68.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 617 :   29360128 : layers.68.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 618 :   29360128 : layers.68.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 619 :   29360128 : layers.68.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 620 :       8192 : layers.68.attention_norm.weight     : [8192]          : torch.bfloat16
 621 :       8192 : layers.68.ffn_norm.weight           : [8192]          : torch.bfloat16
 622 :    8388608 : layers.69.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 623 :    1048576 : layers.69.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 624 :    1048576 : layers.69.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 625 :    8388608 : layers.69.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 626 :   29360128 : layers.69.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 627 :   29360128 : layers.69.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 628 :   29360128 : layers.69.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 629 :       8192 : layers.69.attention_norm.weight     : [8192]          : torch.bfloat16
 630 :       8192 : layers.69.ffn_norm.weight           : [8192]          : torch.bfloat16
 631 :    8388608 : layers.70.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 632 :    1048576 : layers.70.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 633 :    1048576 : layers.70.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 634 :    8388608 : layers.70.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 635 :   29360128 : layers.70.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 636 :   29360128 : layers.70.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 637 :   29360128 : layers.70.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 638 :       8192 : layers.70.attention_norm.weight     : [8192]          : torch.bfloat16
 639 :       8192 : layers.70.ffn_norm.weight           : [8192]          : torch.bfloat16
 640 :    8388608 : layers.71.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 641 :    1048576 : layers.71.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 642 :    1048576 : layers.71.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 643 :    8388608 : layers.71.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 644 :   29360128 : layers.71.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 645 :   29360128 : layers.71.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 646 :   29360128 : layers.71.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 647 :       8192 : layers.71.attention_norm.weight     : [8192]          : torch.bfloat16
 648 :       8192 : layers.71.ffn_norm.weight           : [8192]          : torch.bfloat16
 649 :    8388608 : layers.72.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 650 :    1048576 : layers.72.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 651 :    1048576 : layers.72.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 652 :    8388608 : layers.72.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 653 :   29360128 : layers.72.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 654 :   29360128 : layers.72.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 655 :   29360128 : layers.72.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 656 :       8192 : layers.72.attention_norm.weight     : [8192]          : torch.bfloat16
 657 :       8192 : layers.72.ffn_norm.weight           : [8192]          : torch.bfloat16
 658 :    8388608 : layers.73.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 659 :    1048576 : layers.73.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 660 :    1048576 : layers.73.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 661 :    8388608 : layers.73.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 662 :   29360128 : layers.73.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 663 :   29360128 : layers.73.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 664 :   29360128 : layers.73.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 665 :       8192 : layers.73.attention_norm.weight     : [8192]          : torch.bfloat16
 666 :       8192 : layers.73.ffn_norm.weight           : [8192]          : torch.bfloat16
 667 :    8388608 : layers.74.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 668 :    1048576 : layers.74.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 669 :    1048576 : layers.74.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 670 :    8388608 : layers.74.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 671 :   29360128 : layers.74.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 672 :   29360128 : layers.74.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 673 :   29360128 : layers.74.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 674 :       8192 : layers.74.attention_norm.weight     : [8192]          : torch.bfloat16
 675 :       8192 : layers.74.ffn_norm.weight           : [8192]          : torch.bfloat16
 676 :    8388608 : layers.75.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 677 :    1048576 : layers.75.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 678 :    1048576 : layers.75.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 679 :    8388608 : layers.75.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 680 :   29360128 : layers.75.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 681 :   29360128 : layers.75.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 682 :   29360128 : layers.75.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 683 :       8192 : layers.75.attention_norm.weight     : [8192]          : torch.bfloat16
 684 :       8192 : layers.75.ffn_norm.weight           : [8192]          : torch.bfloat16
 685 :    8388608 : layers.76.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 686 :    1048576 : layers.76.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 687 :    1048576 : layers.76.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 688 :    8388608 : layers.76.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 689 :   29360128 : layers.76.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 690 :   29360128 : layers.76.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 691 :   29360128 : layers.76.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 692 :       8192 : layers.76.attention_norm.weight     : [8192]          : torch.bfloat16
 693 :       8192 : layers.76.ffn_norm.weight           : [8192]          : torch.bfloat16
 694 :    8388608 : layers.77.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 695 :    1048576 : layers.77.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 696 :    1048576 : layers.77.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 697 :    8388608 : layers.77.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 698 :   29360128 : layers.77.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 699 :   29360128 : layers.77.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 700 :   29360128 : layers.77.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 701 :       8192 : layers.77.attention_norm.weight     : [8192]          : torch.bfloat16
 702 :       8192 : layers.77.ffn_norm.weight           : [8192]          : torch.bfloat16
 703 :    8388608 : layers.78.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 704 :    1048576 : layers.78.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 705 :    1048576 : layers.78.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 706 :    8388608 : layers.78.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 707 :   29360128 : layers.78.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 708 :   29360128 : layers.78.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 709 :   29360128 : layers.78.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 710 :       8192 : layers.78.attention_norm.weight     : [8192]          : torch.bfloat16
 711 :       8192 : layers.78.ffn_norm.weight           : [8192]          : torch.bfloat16
 712 :    8388608 : layers.79.attention.wq.weight       : [1024, 8192]    : torch.bfloat16
 713 :    1048576 : layers.79.attention.wk.weight       : [128, 8192]     : torch.bfloat16
 714 :    1048576 : layers.79.attention.wv.weight       : [128, 8192]     : torch.bfloat16
 715 :    8388608 : layers.79.attention.wo.weight       : [8192, 1024]    : torch.bfloat16
 716 :   29360128 : layers.79.feed_forward.w1.weight    : [3584, 8192]    : torch.bfloat16
 717 :   29360128 : layers.79.feed_forward.w3.weight    : [3584, 8192]    : torch.bfloat16
 718 :   29360128 : layers.79.feed_forward.w2.weight    : [8192, 3584]    : torch.bfloat16
 719 :       8192 : layers.79.attention_norm.weight     : [8192]          : torch.bfloat16
 720 :       8192 : layers.79.ffn_norm.weight           : [8192]          : torch.bfloat16
 721 :       8192 : norm.weight                         : [8192]          : torch.bfloat16
 722 :  131334144 : output.weight                       : [16032, 8192]   : torch.bfloat16
Total number of parameters: 70562938880
70.5629 B
141125877760 Bytes
131.4337 GB
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no library tag.