diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors deleted file mode 100644 index 1d3e70ca713abdaa4ccd73e199da5f16c9d1ace3..0000000000000000000000000000000000000000 --- a/model-00001-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:86535eb0b148ab1dea0ddc62a5c4de9a96a2bd66930f4f1b33fe3b697c2dcecf -size 10000012352 diff --git a/model-00001-of-00066.safetensors b/model-00001-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c712192ab5cbf42fa97bdb573631cc1cc6bf8a9c --- /dev/null +++ b/model-00001-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990f5d957ecd91f3ab36613c6ca76860940b4152c335b3c482127bea6df0e84a +size 517996688 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors deleted file mode 100644 index c30cfc56416a8b261499c48f34b8826142739d8d..0000000000000000000000000000000000000000 --- a/model-00002-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fdd792c0d520810d519424f22f6cb763bc47d604d22a17580f1998e5751124ee -size 9997403496 diff --git a/model-00002-of-00066.safetensors b/model-00002-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d1ad27f72f573a6d290d07a093eeb3c3fa551f3 --- /dev/null +++ b/model-00002-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb4800cf733a706ed2016e047c5d31e38eac058c7a6300e5258b1e141345a01b +size 117576 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors deleted file mode 100644 index 1795c637c7c9799459638c9bcd5ad322d4028e75..0000000000000000000000000000000000000000 --- a/model-00003-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c33d282892a77510442f1c1d947d4a35ad7cddcc7c12a74eb65a27b5c5c7c20c -size 9995576736 diff --git a/model-00003-of-00066.safetensors b/model-00003-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bbfc14d6faec3fbd2ee192f2e225ff958a5d885 --- /dev/null +++ b/model-00003-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f5200e79a7eff98d56cffb8686df8b1c3f0dd4b2c7f1d6a0115b5b5e635693 +size 352325048 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors deleted file mode 100644 index 908aa190b9c8ab190f8521a1208f0fe68c3c0e1c..0000000000000000000000000000000000000000 --- a/model-00004-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d2e405f57ea9860d869a91898ecb9d759543a3ed1b564a862926a94de6cdf56 -size 3611653272 diff --git a/model-00004-of-00066.safetensors b/model-00004-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2654e7b686612f3de375c88fa070f0af5af1474a --- /dev/null +++ b/model-00004-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d49b5e7402990dc8964bb5e9dceca50e2ec8eff3553b1de1007a539853725 +size 234884248 diff --git a/model-00005-of-00066.safetensors b/model-00005-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22a0c32004749e40c03883619f3935f8fd55cce1 --- /dev/null +++ b/model-00005-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29db88c6b23744044346f879a01a5b6f749bad6912e7e554f1c87b41b1059535 +size 117832 diff --git a/model-00006-of-00066.safetensors b/model-00006-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..992cc2356f53c50b29ee138504cbcef46efc4591 --- /dev/null +++ b/model-00006-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e189042a7fb8b087b62c0813957bb6fb3e3625b5927a320debd61e30a8772de +size 645929928 diff --git a/model-00007-of-00066.safetensors b/model-00007-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad1176b42dc9e9acb42d8a94b75007c5b38c653e --- /dev/null +++ b/model-00007-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67875f85e665170b9bf3afd0305da5e25d0bf5295825b5c2ad2426571c0ebd8c +size 322965000 diff --git a/model-00008-of-00066.safetensors b/model-00008-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9eb2999b815be0d0f5a26896b49df1235bf35c40 --- /dev/null +++ b/model-00008-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3c18fc4e6f22416608a0044255d887756f9f386ff709975ad12505f04a20a0 +size 7342912 diff --git a/model-00009-of-00066.safetensors b/model-00009-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1dc93758787e26293817cab5a7b98f6a2926ef8f --- /dev/null +++ b/model-00009-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a055f8e04b4aa22b45a0c6a2fab799c95ccf9ab5e59dfcfeba19ddaab58e1c1 +size 738213096 diff --git a/model-00010-of-00066.safetensors b/model-00010-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed1487d8c623223f93b3c7185a680128cced6fd9 --- /dev/null +++ b/model-00010-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d532d89606f48cb378b4c5d699b7e3f9eff7da8160d0fd999647e0c5581267ec +size 738213096 diff --git a/model-00011-of-00066.safetensors b/model-00011-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7e50aa58b5e389ba748a74e127e5e1bc18e6646 --- /dev/null +++ b/model-00011-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e32b2aa9ee614f2b4582d2ad63944a8a7ee30823c268e85ab71e54d4f2e8c0 +size 738213096 diff --git a/model-00012-of-00066.safetensors b/model-00012-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2d77405a0db37df00b710ff4fd96dd707d5f60e --- /dev/null +++ b/model-00012-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca15969fbc6754b1089e228188b8d62d557bf82a38c8fa76dad5d504fcf3e9d6 +size 738213096 diff --git a/model-00013-of-00066.safetensors b/model-00013-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5476b7ad96e494d76cbcb2803873d48db81b6bbc --- /dev/null +++ b/model-00013-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700b874bc1111ae15ff14b5a592ce5c6f90591ee544039631e23565319be13eb +size 738213096 diff --git a/model-00014-of-00066.safetensors b/model-00014-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a66c71b543879f73ff085af135a64b43a26b003 --- /dev/null +++ b/model-00014-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bab7ac3d9aec77c5bf276df81da21de14ee6ef7637eb091322cf578e17d36af8 +size 738213096 diff --git a/model-00015-of-00066.safetensors b/model-00015-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb29cc6b4adc4509bc3e7e716e761e069fb2d428 --- /dev/null +++ b/model-00015-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68184674f7bc58b23890a7c061d819b721cc8eddd4e59fc4690ba5f582ff755 +size 738213096 diff --git a/model-00016-of-00066.safetensors b/model-00016-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..543bf2e2dd887275417c0c6d8840dc3d3e172b4b --- /dev/null +++ b/model-00016-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab78da826ddfc903a5082221ce6bc4042724576d9e4e7f2d1eaf80d2f32d0cb +size 738213096 diff --git a/model-00017-of-00066.safetensors b/model-00017-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb6b972c6d1a3542819c37934bc9250410a62cfe --- /dev/null +++ b/model-00017-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fcb262d6582dfa511313c1c2004be3d429c95e59d3961e80a31a0cc92dcf537 +size 738213096 diff --git a/model-00018-of-00066.safetensors b/model-00018-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f177b1bceb99102663c35b1276524aa5a1f1230 --- /dev/null +++ b/model-00018-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4494e04181f467c232eb5c2a14ac36c89c2575bfa40ec847dcee30c8540999 +size 738213096 diff --git a/model-00019-of-00066.safetensors b/model-00019-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b07b0dd31f2a602dbe0179dd4a912d9d307f93e --- /dev/null +++ b/model-00019-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b83573eb70b9b0809a24cfdd33166e7fdd5171be2d2ed2a14f1104473888db9 +size 738213224 diff --git a/model-00020-of-00066.safetensors b/model-00020-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7ef62f2f4c72f9b8318b78cc138f6cddda9e5a9 --- /dev/null +++ b/model-00020-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2dd8efe8ce6b05e322608ea66fe661038a378e7abb5fb31adb5ca97f95f9ba3 +size 738213224 diff --git a/model-00021-of-00066.safetensors b/model-00021-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12a6c8fe4322f6f4fd3d4a1c20a0b7fc79df0801 --- /dev/null +++ b/model-00021-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0e4e435528aa7b744b789b34e0454d6b87eb298cd3c992e79c5dd6bafb5a9c +size 738213224 diff --git a/model-00022-of-00066.safetensors b/model-00022-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..113adf1a9e86c334fc009657be107e01f49d5abd --- /dev/null +++ b/model-00022-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f9d1ef9d465d58357c694489b173617fb1e3b279610399545ee5534de23b9f +size 738213224 diff --git a/model-00023-of-00066.safetensors b/model-00023-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74a335c2aed4f51c9e6dd8e4c4ae8a184679a4c1 --- /dev/null +++ b/model-00023-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ddbf8ab7c1ea10a8fae4fc28e03a43081c827ac56955edf3bda59f7d8502ce6 +size 738213224 diff --git a/model-00024-of-00066.safetensors b/model-00024-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8185b3c3cd1135ca4ed55517c59f1c2055c45d01 --- /dev/null +++ b/model-00024-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170841dfab71106f6db87dda3ec223e065422f353d3a6565d1696159a4f8feef +size 738213224 diff --git a/model-00025-of-00066.safetensors b/model-00025-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29cfe37db1b12a2b180c9a0a3f3a021c9593beeb --- /dev/null +++ b/model-00025-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be25c95a1bc33cbd2245a1e5b1c883249a04360ee9d773d8ca5adbd4fc577747 +size 738213224 diff --git a/model-00026-of-00066.safetensors b/model-00026-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e78a432b58b6b2f1a9bf85c4403bea82744a10b9 --- /dev/null +++ b/model-00026-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4548c009c7859d85bc1faa7b5c364aa0a0915847a42914edc56159dc462196f1 +size 738213224 diff --git a/model-00027-of-00066.safetensors b/model-00027-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d597494770fd60e3f287c3177021f7fdc74de82b --- /dev/null +++ b/model-00027-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb0b9565e4d0450ee19384403dd6818c3a3f22f99a563f4db5f7e3612dbe4c5 +size 738213224 diff --git a/model-00028-of-00066.safetensors b/model-00028-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6adbbb7b8d704e9d30e120a35a2689af35d576a --- /dev/null +++ b/model-00028-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c573de5531b37150d55ed7d48a3da06c1b691df2ea90d472b7d6ea222549b52 +size 738213224 diff --git a/model-00029-of-00066.safetensors b/model-00029-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fbe339f368a2af5ecfe380608778d079a659794e --- /dev/null +++ b/model-00029-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc4fb2cd8f1ad3fab7b9b9607edc9ee7c4ba5f918571261452563f83bc86450 +size 738213224 diff --git a/model-00030-of-00066.safetensors b/model-00030-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..924c6947186f5e25f54d7d294f0dcac24ffdf7e9 --- /dev/null +++ b/model-00030-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:298be0af88cdbf377e9e5f8e8c2e2de09b9fe4f5fee802d75f6397da2ad1fc94 +size 738213224 diff --git a/model-00031-of-00066.safetensors b/model-00031-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44a43d389953f9f3034cfb818e841034bf02d152 --- /dev/null +++ b/model-00031-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:831d1fb9a21efd660405c87e3d2c0c11f0a97b8d195e4905f73befdc5ccac956 +size 738213224 diff --git a/model-00032-of-00066.safetensors b/model-00032-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad086ba2adb141e048da5d21b929d90f5be667b0 --- /dev/null +++ b/model-00032-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695dd48faaa77ae98a926ddf89ec52f20fedb4ee8005684e07f62479fdac3770 +size 738213224 diff --git a/model-00033-of-00066.safetensors b/model-00033-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..641d18b9a439fe752370c56b2447c528017c79a6 --- /dev/null +++ b/model-00033-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed68d3cf4ed28a38bfa1807b5aaeff2ef6aec911801c89fe501eee769abc865 +size 738213224 diff --git a/model-00034-of-00066.safetensors b/model-00034-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98b7a37e88eefd2f33e9f4d4bf34b74ad903363f --- /dev/null +++ b/model-00034-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08320db5f17520a4e0fcdd308d7914c36753e046295421cbd02aa2a550a7dd47 +size 738213224 diff --git a/model-00035-of-00066.safetensors b/model-00035-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f7521ba9c21ed01db3a7e2949f639a6588f82df --- /dev/null +++ b/model-00035-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:954050e5640d9e362b41422875331c5b4d7c8142c416fb8cfccb58359fba8c21 +size 738213224 diff --git a/model-00036-of-00066.safetensors b/model-00036-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d55b936588730ba3f4a1603d671db6f021d9c0b --- /dev/null +++ b/model-00036-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e63c5acc82bb62c76e3b768f06729b11999a2490049b39a09b9622c30aac9a +size 738213224 diff --git a/model-00037-of-00066.safetensors b/model-00037-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76265769358d690dc8ce27545514d2f66d3b1f62 --- /dev/null +++ b/model-00037-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4270970344da76ef5a9f4f84ced7f70df39ee2ba7ab27c9abf52283bef6dfdea +size 369106616 diff --git a/model-00038-of-00066.safetensors b/model-00038-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84ac96c73fb551923ab64a82a236f663254859f2 --- /dev/null +++ b/model-00038-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b78ac13b348b848e3c85778203299dd1111ff48fbf2ce504827e7a2b629b6327 +size 369106616 diff --git a/model-00039-of-00066.safetensors b/model-00039-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a33bb2f59c5d532c278e02653db2595b5c941fc --- /dev/null +++ b/model-00039-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90c779a031bee3e0674014704c902e748768b1f1ddcfce6202d31c70804aa06 +size 369106616 diff --git a/model-00040-of-00066.safetensors b/model-00040-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..458d2969790a4c47275e345851d845329ea3c516 --- /dev/null +++ b/model-00040-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:789c203ab2f58e0cb56df63b14ad0aaab00fd177ff1505b63c77c0dfe1c3d2a6 +size 369106616 diff --git a/model-00041-of-00066.safetensors b/model-00041-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df4c19ef81fc1e3e44389fd7f0a1dab6bee36ca4 --- /dev/null +++ b/model-00041-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dda1bafa925026396476477c267f5ce1d23012f7697822d35f446135faf7f21d +size 369106616 diff --git a/model-00042-of-00066.safetensors b/model-00042-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a69a4bb3735bd582f216b9183ac0d1c3e53aee4b --- /dev/null +++ b/model-00042-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b338d53bc92657049eedd20e0d6549bc5dc40473a5a7a476aed6cb2de5afb38a +size 369106616 diff --git a/model-00043-of-00066.safetensors b/model-00043-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc8a68018a53e204ce8e2ef122031b7276568ee5 --- /dev/null +++ b/model-00043-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:364cdae1de7356b8fc03658eefd15280ced910f978f6578998ecfe44084d4792 +size 369106616 diff --git a/model-00044-of-00066.safetensors b/model-00044-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36e3754757673eecf7835cc8367bf71fe62d8a1f --- /dev/null +++ b/model-00044-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986ed701ef3e167ee2d4551cf3cd312adff81fd20ec6d085dfc7934abdb9f7c1 +size 369106616 diff --git a/model-00045-of-00066.safetensors b/model-00045-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..342011da6587bf0d52c508b01de68b2c5aed9e74 --- /dev/null +++ b/model-00045-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b8d8ff1c0341e5cc7aec12f276ac33b396c24646f4f164f4b0dc9c41f220abd +size 369106616 diff --git a/model-00046-of-00066.safetensors b/model-00046-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62a944b153f69959b96b55ac23105461d095dd6d --- /dev/null +++ b/model-00046-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9c7330a36078b1de4438a53276ae41973be9a22db9effd130718e2970843409 +size 369106616 diff --git a/model-00047-of-00066.safetensors b/model-00047-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb037775792572ecea71084f11fa09512d61d1f5 --- /dev/null +++ b/model-00047-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982b5e752bbda14e4a496e7d140fbed0019ff5ca30815d77c98a8a50f5a8bcd9 +size 369106680 diff --git a/model-00048-of-00066.safetensors b/model-00048-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4582893ee4acfb07d1259337a22771cfcec23f28 --- /dev/null +++ b/model-00048-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71690d24c5cb5a78325ec897b0d7d06e3b6540fb6534c1d65438acf4963ce968 +size 369106680 diff --git a/model-00049-of-00066.safetensors b/model-00049-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53c9c7fda1d9893a92a13d5f2f220f5676f8a493 --- /dev/null +++ b/model-00049-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfab6962a7192475f107122d07d42cd77b2dde4ee7786df292bc947b72f829ee +size 369106680 diff --git a/model-00050-of-00066.safetensors b/model-00050-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd7baa061b3f3d94cf31aa8c9af21ce599deb74f --- /dev/null +++ b/model-00050-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52015059d8caddc1eaebaae847b3892f147d203d7ef93d87d5af6bae2d6de78 +size 369106680 diff --git a/model-00051-of-00066.safetensors b/model-00051-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bddcccdb507dad63244aa74fd809b0af954fec7 --- /dev/null +++ b/model-00051-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:691647afb27304520107ab8019785255860c85284ded1cc4ef4494299df01255 +size 369106680 diff --git a/model-00052-of-00066.safetensors b/model-00052-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c491e2fabf139c162f977af456cd578ccbbde393 --- /dev/null +++ b/model-00052-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd1a92cc21c09105d151330e236fa2a7c3e71523cd6c2a89898ef0bb5f59cc64 +size 369106680 diff --git a/model-00053-of-00066.safetensors b/model-00053-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..704c75222a784618740f37458918a11cbab521b0 --- /dev/null +++ b/model-00053-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27cc045436b1d75922ddcc747470be5a9a4a905d89cfc8e0156f8631977209da +size 369106680 diff --git a/model-00054-of-00066.safetensors b/model-00054-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66b31e3976e93a377fdb8b81aac706d999c7816e --- /dev/null +++ b/model-00054-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a4ad6b3d7781f0f157c94259f42059a08903b9c84086c897b2972335d1e75c4 +size 369106680 diff --git a/model-00055-of-00066.safetensors b/model-00055-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a9e94be3954ed7513b85e9e39fe05f0a0587d30 --- /dev/null +++ b/model-00055-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a117ed6ae45229f1412534169d4593d4c9be4c1d5490c60a6162a40683db7e8 +size 369106680 diff --git a/model-00056-of-00066.safetensors b/model-00056-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..284e0f69ecc1a3ca3ac16d73cf9a7116e4099442 --- /dev/null +++ b/model-00056-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81471a1ebe699b1442c5e3a139b089819c19d5a8782d5ed488920349610940d7 +size 369106680 diff --git a/model-00057-of-00066.safetensors b/model-00057-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66b85ae5c43034d210fa619e9547cdfb4f5ac0d7 --- /dev/null +++ b/model-00057-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6780c6ed1090f8f00dba9b1cacea609fad9d43a0c205a502d807d65f37d2280 +size 369106680 diff --git a/model-00058-of-00066.safetensors b/model-00058-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b90224e995df42817e050196ad74e3907f2116db --- /dev/null +++ b/model-00058-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e85ed17c368fbdeb9a9de236b63f3be580859d12d5b1009f96de2ad5716ebcd +size 369106680 diff --git a/model-00059-of-00066.safetensors b/model-00059-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79db2d7e056fa4580696e88c203b49bca095ae18 --- /dev/null +++ b/model-00059-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e68114a190c2457f29c5f2d1f11d925b44d434587ed4bbe65650978c45d6568 +size 369106680 diff --git a/model-00060-of-00066.safetensors b/model-00060-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30b115654b6fcee30f41d0fdfb65199dd8e2bb27 --- /dev/null +++ b/model-00060-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1403c1da91d5c04c6f63a6bf0814a00cd270ea2be5e829dfa221c45c2887dd +size 369106680 diff --git a/model-00061-of-00066.safetensors b/model-00061-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c923e9ca525c44bb73f001477fdc347d49fef02 --- /dev/null +++ b/model-00061-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f994bd65b435a4ceb8f3fd1bc597200448e2448bf2251628e4215e6c875ecef0 +size 369106680 diff --git a/model-00062-of-00066.safetensors b/model-00062-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91793aa5947516e0c73b161197d2586411b5a428 --- /dev/null +++ b/model-00062-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d6e101d8e8e31a5b6a21b9a8bed7f654338f813b6105c62da27adc579988a4c +size 369106680 diff --git a/model-00063-of-00066.safetensors b/model-00063-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15481dbcde83e1f8d8b19992cc3ad745a1fcf150 --- /dev/null +++ b/model-00063-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20897935d7d2f0c92a3ebbfed71f8425ef8a764c2766c314ca98ae965c319da6 +size 369106680 diff --git a/model-00064-of-00066.safetensors b/model-00064-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5444696ce0b26ddf11eb390809626735adaa4d57 --- /dev/null +++ b/model-00064-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505ed846eab1cf112ada7d94a0b8d5a0e3e304c5b633a35cfaebc86e982b73f8 +size 369106680 diff --git a/model-00065-of-00066.safetensors b/model-00065-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e73b838efafe1cd4e49a95dd4e90c303c51d2e67 --- /dev/null +++ b/model-00065-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc1a8edbab6053d24dec4ce1c45e641c76250adc27102b1ae1fb4dd1343d2028 +size 4216 diff --git a/model-00066-of-00066.safetensors b/model-00066-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c26da2503b17290425fd871ae7163ad61ade3c81 --- /dev/null +++ b/model-00066-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a719579d10094e1dd02d3dc29046c1fb01ca49866fe7be6b173df87d176e65ef +size 517996672 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 6702a626b3123799dc4ed159482c11043833744e..0136126df3c9b74c2ea12ba65518844c9f6f44c8 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -3,5608 +3,5608 @@ "total_size": 33603948544 }, "weight_map": { - "lm_head.weight": "model-00004-of-00004.safetensors", - "model.layers.0.attention.dense.weight": "model-00001-of-00004.safetensors", - "model.layers.0.attention.query_key_value.weight": "model-00001-of-00004.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.0.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.0.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.0.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.1.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.1.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.1.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.10.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.10.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.10.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.11.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.11.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.11.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.12.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.12.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.12.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.13.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.13.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.13.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.14.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.14.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.14.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.15.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.15.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.15.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.16.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.16.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.16.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.17.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.17.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.17.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.18.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.18.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.18.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.19.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.19.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.19.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.2.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.2.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.2.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.20.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.20.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.20.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.21.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.21.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.21.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.22.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.22.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.22.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.23.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.23.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.23.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.24.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.24.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.24.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.25.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.25.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.25.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.26.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.26.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.26.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.27.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.27.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.27.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.28.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.28.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.28.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.29.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.29.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.29.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.3.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.3.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.3.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.30.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.30.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.30.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.31.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.31.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.31.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.32.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.32.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.32.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.33.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.33.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.33.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.34.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.34.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.34.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.35.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.35.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.35.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.36.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.36.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.36.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.37.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.37.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.37.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.38.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.38.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.38.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.39.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.39.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.39.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.4.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.4.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.4.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.40.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.40.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.40.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.41.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.41.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.41.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.42.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.42.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.42.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.43.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.43.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.43.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.44.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.44.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.44.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.45.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.45.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.45.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.46.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.46.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.46.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.47.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.47.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.47.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.48.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.48.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.48.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.49.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.49.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.49.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.5.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.5.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.5.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.50.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.50.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.50.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.51.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.51.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.51.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.52.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.52.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.52.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.53.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.53.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.53.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.54.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.54.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.54.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.55.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.55.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.55.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.56.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.56.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.56.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.57.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.57.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.57.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.58.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.58.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.58.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.59.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.59.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.59.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.6.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.6.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.6.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.60.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.60.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.60.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.61.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.61.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.61.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.62.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.62.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.62.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.63.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.63.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.63.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.7.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.7.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.7.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.8.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.8.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.8.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.9.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.9.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.experts.9.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.gate.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.shared_experts.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.shared_experts.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.shared_experts.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.attention.dense.weight": "model-00001-of-00004.safetensors", - "model.layers.1.attention.query_key_value.weight": "model-00001-of-00004.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.0.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.0.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.0.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.1.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.1.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.1.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.10.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.10.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.10.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.11.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.11.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.11.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.12.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.12.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.12.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.13.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.13.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.13.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.14.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.14.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.14.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.15.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.15.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.15.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.16.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.16.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.16.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.17.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.17.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.17.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.18.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.18.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.18.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.19.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.19.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.19.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.2.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.2.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.2.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.20.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.20.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.20.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.21.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.21.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.21.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.22.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.22.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.22.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.23.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.23.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.23.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.24.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.24.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.24.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.25.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.25.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.25.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.26.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.26.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.26.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.27.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.27.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.27.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.28.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.28.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.28.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.29.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.29.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.29.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.3.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.3.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.3.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.30.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.30.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.30.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.31.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.31.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.31.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.32.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.32.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.32.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.33.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.33.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.33.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.34.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.34.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.34.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.35.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.35.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.35.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.36.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.36.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.36.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.37.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.37.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.37.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.38.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.38.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.38.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.39.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.39.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.39.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.4.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.4.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.4.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.40.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.40.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.40.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.41.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.41.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.41.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.42.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.42.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.42.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.43.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.43.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.43.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.44.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.44.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.44.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.45.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.45.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.45.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.46.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.46.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.46.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.47.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.47.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.47.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.48.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.48.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.48.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.49.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.49.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.49.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.5.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.5.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.5.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.50.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.50.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.50.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.51.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.51.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.51.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.52.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.52.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.52.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.53.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.53.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.53.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.54.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.54.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.54.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.55.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.55.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.55.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.56.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.56.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.56.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.57.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.57.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.57.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.58.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.58.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.58.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.59.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.59.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.59.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.6.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.6.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.6.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.60.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.60.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.60.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.61.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.61.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.61.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.62.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.62.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.62.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.63.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.63.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.63.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.7.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.7.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.7.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.8.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.8.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.8.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.9.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.9.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.experts.9.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.gate.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.shared_experts.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.shared_experts.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.shared_experts.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.10.attention.dense.weight": "model-00002-of-00004.safetensors", - "model.layers.10.attention.query_key_value.weight": "model-00002-of-00004.safetensors", - "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.0.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.0.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.1.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.1.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.10.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.10.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.11.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.11.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.12.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.12.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.13.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.13.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.14.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.14.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.15.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.15.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.16.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.16.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.16.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.17.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.17.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.17.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.18.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.18.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.18.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.19.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.19.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.19.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.2.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.2.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.20.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.20.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.20.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.21.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.21.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.21.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.22.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.22.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.22.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.23.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.23.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.23.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.24.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.24.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.24.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.25.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.25.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.25.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.26.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.26.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.26.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.27.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.27.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.27.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.28.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.28.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.28.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.29.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.29.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.29.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.3.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.3.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.30.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.30.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.30.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.31.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.31.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.31.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.32.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.32.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.32.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.33.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.33.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.33.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.34.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.34.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.34.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.35.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.35.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.35.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.36.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.36.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.36.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.38.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.38.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.39.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.39.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.39.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.4.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.4.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.40.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.40.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.40.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.41.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.41.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.41.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.42.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.42.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.42.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.43.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.43.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.43.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.44.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.44.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.44.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.45.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.45.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.45.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.46.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.46.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.46.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.47.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.47.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.47.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.48.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.48.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.48.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.49.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.49.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.49.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.5.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.5.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.50.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.50.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.50.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.51.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.51.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.51.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.52.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.52.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.52.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.53.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.53.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.53.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.54.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.54.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.54.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.55.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.55.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.55.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.56.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.56.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.56.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.57.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.57.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.57.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.58.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.58.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.58.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.59.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.59.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.59.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.6.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.6.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.60.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.60.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.60.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.61.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.61.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.61.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.62.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.62.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.62.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.63.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.63.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.63.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.7.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.7.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.8.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.8.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.9.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.experts.9.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.gate.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.attention.dense.weight": "model-00002-of-00004.safetensors", - "model.layers.11.attention.query_key_value.weight": "model-00002-of-00004.safetensors", - "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.0.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.0.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.1.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.1.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.10.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.10.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.11.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.11.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.12.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.12.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.13.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.13.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.14.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.14.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.15.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.15.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.16.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.16.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.16.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.17.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.17.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.17.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.18.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.18.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.18.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.19.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.19.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.19.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.2.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.2.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.20.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.20.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.20.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.21.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.21.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.21.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.22.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.22.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.22.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.23.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.23.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.23.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.24.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.24.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.24.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.25.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.25.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.25.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.26.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.26.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.26.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.27.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.27.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.27.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.28.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.28.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.28.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.29.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.29.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.29.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.3.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.3.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.30.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.30.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.30.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.31.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.31.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.31.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.32.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.32.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.32.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.33.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.33.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.33.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.34.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.34.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.34.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.35.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.35.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.35.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.36.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.36.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.36.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.38.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.38.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.39.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.39.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.39.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.4.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.4.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.40.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.40.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.40.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.41.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.41.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.41.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.42.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.42.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.42.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.43.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.43.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.43.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.44.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.44.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.44.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.45.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.45.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.45.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.46.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.46.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.46.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.47.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.47.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.47.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.48.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.48.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.48.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.49.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.49.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.49.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.5.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.5.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.50.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.50.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.50.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.51.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.51.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.51.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.52.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.52.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.52.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.53.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.53.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.53.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.54.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.54.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.54.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.55.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.55.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.55.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.56.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.56.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.56.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.57.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.57.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.57.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.58.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.58.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.58.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.59.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.59.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.59.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.6.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.6.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.60.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.60.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.60.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.61.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.61.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.61.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.62.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.62.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.62.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.63.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.63.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.63.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.7.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.7.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.8.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.8.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.9.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.experts.9.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.gate.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.attention.dense.weight": "model-00002-of-00004.safetensors", - "model.layers.12.attention.query_key_value.weight": "model-00002-of-00004.safetensors", - "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.0.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.0.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.0.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.1.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.1.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.1.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.10.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.10.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.10.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.11.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.11.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.11.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.12.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.12.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.12.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.13.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.13.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.13.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.14.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.14.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.14.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.15.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.15.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.15.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.16.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.16.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.16.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.17.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.17.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.17.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.18.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.18.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.18.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.19.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.19.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.19.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.2.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.2.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.2.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.20.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.20.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.20.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.21.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.21.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.21.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.22.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.22.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.22.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.23.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.23.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.23.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.24.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.24.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.24.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.25.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.25.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.25.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.26.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.26.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.26.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.27.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.27.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.27.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.28.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.28.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.28.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.29.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.29.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.29.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.3.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.3.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.3.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.30.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.30.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.30.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.31.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.31.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.31.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.32.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.32.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.32.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.33.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.33.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.33.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.34.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.34.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.34.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.35.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.35.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.35.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.36.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.36.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.36.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.38.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.38.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.39.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.39.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.39.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.4.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.4.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.4.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.40.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.40.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.40.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.41.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.41.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.41.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.42.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.42.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.42.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.43.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.43.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.43.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.44.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.44.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.44.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.45.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.45.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.45.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.46.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.46.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.46.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.47.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.47.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.47.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.48.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.48.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.48.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.49.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.49.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.49.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.5.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.5.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.5.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.50.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.50.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.50.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.51.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.51.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.51.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.52.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.52.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.52.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.53.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.53.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.53.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.54.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.54.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.54.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.55.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.55.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.55.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.56.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.56.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.56.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.57.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.57.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.57.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.58.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.58.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.58.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.59.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.59.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.59.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.6.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.6.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.6.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.60.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.60.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.60.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.61.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.61.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.61.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.62.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.62.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.62.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.63.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.63.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.63.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.7.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.7.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.7.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.8.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.8.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.8.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.9.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.9.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.experts.9.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.gate.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.attention.dense.weight": "model-00002-of-00004.safetensors", - "model.layers.13.attention.query_key_value.weight": "model-00002-of-00004.safetensors", - "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.0.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.0.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.0.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.1.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.1.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.1.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.10.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.10.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.10.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.11.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.11.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.11.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.12.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.12.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.12.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.13.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.13.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.13.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.14.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.14.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.14.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.15.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.15.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.15.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.16.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.16.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.16.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.17.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.17.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.17.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.18.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.18.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.18.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.19.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.19.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.19.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.2.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.2.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.2.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.20.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.20.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.20.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.21.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.21.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.21.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.22.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.22.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.22.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.23.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.23.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.23.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.24.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.24.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.24.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.25.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.25.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.25.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.26.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.26.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.26.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.27.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.27.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.27.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.28.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.28.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.28.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.29.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.29.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.29.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.3.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.3.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.3.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.30.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.30.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.30.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.31.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.31.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.31.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.32.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.32.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.32.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.33.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.33.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.33.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.34.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.34.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.34.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.35.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.35.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.35.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.36.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.36.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.36.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.38.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.38.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.39.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.39.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.39.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.4.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.4.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.4.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.40.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.40.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.40.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.41.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.41.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.41.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.42.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.42.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.42.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.43.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.43.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.43.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.44.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.44.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.44.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.45.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.45.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.45.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.46.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.46.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.46.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.47.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.47.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.47.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.48.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.48.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.48.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.49.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.49.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.49.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.5.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.5.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.5.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.50.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.50.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.50.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.51.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.51.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.51.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.52.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.52.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.52.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.53.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.53.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.53.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.54.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.54.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.54.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.55.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.55.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.55.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.56.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.56.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.56.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.57.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.57.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.57.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.58.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.58.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.58.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.59.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.59.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.59.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.6.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.6.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.6.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.60.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.60.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.60.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.61.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.61.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.61.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.62.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.62.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.62.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.63.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.63.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.63.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.7.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.7.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.7.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.8.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.8.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.8.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.9.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.9.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.experts.9.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.gate.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.attention.dense.weight": "model-00002-of-00004.safetensors", - "model.layers.14.attention.query_key_value.weight": "model-00002-of-00004.safetensors", - "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.0.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.0.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.0.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.1.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.1.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.1.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.10.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.10.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.10.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.11.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.11.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.11.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.12.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.12.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.12.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.13.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.13.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.13.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.14.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.14.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.14.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.15.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.15.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.15.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.16.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.16.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.16.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.17.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.17.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.17.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.18.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.18.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.18.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.19.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.19.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.19.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.2.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.2.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.2.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.20.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.20.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.20.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.21.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.21.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.21.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.22.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.22.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.22.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.23.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.23.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.23.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.24.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.24.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.24.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.25.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.25.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.25.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.26.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.26.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.26.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.27.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.27.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.27.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.28.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.28.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.28.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.29.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.29.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.29.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.3.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.3.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.3.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.30.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.30.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.30.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.31.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.31.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.31.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.32.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.32.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.32.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.33.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.33.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.33.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.34.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.34.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.34.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.35.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.35.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.35.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.36.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.36.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.36.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.38.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.38.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.39.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.39.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.39.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.4.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.4.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.4.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.40.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.40.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.40.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.41.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.41.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.41.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.42.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.42.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.42.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.43.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.43.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.43.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.44.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.44.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.44.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.45.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.45.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.45.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.46.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.46.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.46.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.47.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.47.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.47.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.48.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.48.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.48.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.49.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.49.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.49.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.5.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.5.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.5.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.50.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.50.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.50.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.51.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.51.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.51.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.52.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.52.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.52.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.53.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.53.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.53.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.54.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.54.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.54.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.55.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.55.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.55.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.56.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.56.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.56.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.57.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.57.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.57.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.58.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.58.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.58.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.59.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.59.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.59.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.6.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.6.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.6.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.60.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.60.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.60.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.61.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.61.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.61.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.62.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.62.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.62.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.63.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.63.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.63.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.7.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.7.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.7.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.8.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.8.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.8.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.9.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.9.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.experts.9.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.gate.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.attention.dense.weight": "model-00002-of-00004.safetensors", - "model.layers.15.attention.query_key_value.weight": "model-00002-of-00004.safetensors", - "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.0.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.0.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.0.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.1.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.1.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.1.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.10.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.10.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.10.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.11.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.11.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.11.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.12.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.12.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.12.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.13.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.13.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.13.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.14.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.14.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.14.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.15.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.15.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.15.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.16.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.16.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.16.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.17.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.17.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.17.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.18.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.18.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.18.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.19.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.19.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.19.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.2.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.2.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.2.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.20.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.20.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.20.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.21.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.21.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.21.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.22.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.22.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.22.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.23.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.23.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.23.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.24.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.24.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.24.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.25.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.25.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.25.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.26.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.26.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.26.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.27.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.27.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.27.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.28.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.28.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.28.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.29.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.29.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.29.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.3.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.3.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.3.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.30.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.30.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.30.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.31.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.31.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.31.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.32.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.32.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.32.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.33.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.33.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.33.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.34.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.34.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.34.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.35.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.35.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.35.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.36.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.36.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.36.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.38.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.38.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.39.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.39.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.39.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.4.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.4.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.4.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.40.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.40.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.40.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.41.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.41.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.41.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.42.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.42.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.42.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.43.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.43.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.43.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.44.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.44.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.44.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.45.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.45.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.45.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.46.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.46.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.46.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.47.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.47.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.47.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.48.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.48.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.48.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.49.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.49.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.49.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.5.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.5.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.5.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.50.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.50.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.50.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.51.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.51.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.51.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.52.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.52.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.52.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.53.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.53.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.53.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.54.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.54.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.54.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.55.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.55.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.55.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.56.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.56.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.56.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.57.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.57.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.57.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.58.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.58.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.58.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.59.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.59.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.59.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.6.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.6.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.6.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.60.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.60.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.60.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.61.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.61.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.61.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.62.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.62.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.62.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.63.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.63.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.63.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.7.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.7.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.7.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.8.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.8.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.8.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.9.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.9.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.experts.9.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.gate.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.attention.dense.weight": "model-00002-of-00004.safetensors", - "model.layers.16.attention.query_key_value.weight": "model-00002-of-00004.safetensors", - "model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.0.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.0.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.0.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.1.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.1.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.1.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.10.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.10.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.10.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.11.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.11.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.11.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.12.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.12.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.12.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.13.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.13.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.13.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.14.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.14.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.14.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.15.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.15.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.15.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.16.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.16.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.16.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.17.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.17.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.17.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.18.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.18.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.18.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.19.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.19.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.19.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.2.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.2.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.2.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.20.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.20.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.20.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.21.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.21.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.21.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.22.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.22.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.22.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.23.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.23.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.23.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.24.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.24.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.24.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.25.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.25.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.25.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.26.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.26.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.26.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.27.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.27.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.27.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.28.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.28.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.28.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.29.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.29.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.29.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.3.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.3.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.3.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.30.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.30.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.30.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.31.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.31.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.31.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.32.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.32.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.32.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.33.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.33.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.33.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.34.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.34.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.34.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.35.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.35.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.35.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.36.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.36.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.36.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.38.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.38.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.39.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.39.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.39.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.4.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.4.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.4.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.40.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.40.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.40.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.41.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.41.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.41.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.42.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.42.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.42.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.43.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.43.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.43.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.44.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.44.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.44.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.45.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.45.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.45.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.46.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.46.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.46.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.47.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.47.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.47.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.48.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.48.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.48.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.49.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.49.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.49.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.5.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.5.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.5.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.50.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.50.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.50.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.51.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.51.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.51.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.52.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.52.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.52.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.53.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.53.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.53.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.54.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.54.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.54.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.55.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.55.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.55.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.56.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.56.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.56.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.57.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.57.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.57.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.58.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.58.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.58.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.59.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.59.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.59.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.6.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.6.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.6.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.60.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.60.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.61.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.61.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.61.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.62.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.62.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.62.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.63.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.63.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.63.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.experts.7.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.7.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.7.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.8.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.8.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.8.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.9.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.9.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.experts.9.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.gate.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.17.attention.dense.weight": "model-00003-of-00004.safetensors", - "model.layers.17.attention.query_key_value.weight": "model-00003-of-00004.safetensors", - "model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.0.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.0.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.0.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.1.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.1.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.1.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.10.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.10.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.10.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.11.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.11.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.11.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.12.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.12.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.12.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.13.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.13.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.13.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.14.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.14.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.14.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.15.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.15.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.15.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.16.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.16.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.16.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.17.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.17.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.17.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.18.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.18.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.18.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.19.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.19.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.19.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.2.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.2.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.2.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.20.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.20.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.20.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.21.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.21.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.21.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.22.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.22.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.22.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.23.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.23.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.23.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.24.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.24.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.24.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.25.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.25.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.25.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.26.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.26.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.26.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.27.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.27.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.27.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.28.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.28.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.28.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.29.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.29.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.29.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.3.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.3.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.3.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.30.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.30.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.30.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.31.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.31.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.31.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.32.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.32.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.32.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.33.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.33.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.33.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.34.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.34.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.34.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.35.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.35.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.35.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.36.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.36.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.36.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.37.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.37.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.37.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.38.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.38.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.38.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.39.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.39.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.39.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.4.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.4.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.4.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.40.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.40.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.40.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.41.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.41.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.42.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.43.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.43.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.43.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.44.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.44.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.44.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.45.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.45.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.45.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.46.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.46.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.46.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.47.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.47.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.47.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.48.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.48.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.48.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.49.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.49.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.49.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.5.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.5.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.5.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.50.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.50.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.50.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.51.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.51.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.51.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.52.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.52.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.52.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.53.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.53.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.53.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.54.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.54.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.54.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.55.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.55.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.55.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.56.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.56.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.56.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.57.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.57.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.57.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.58.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.58.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.58.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.59.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.59.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.59.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.60.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.60.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.61.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.61.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.61.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.62.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.62.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.62.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.63.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.63.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.63.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.7.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.7.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.7.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.8.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.8.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.8.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.9.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.9.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.experts.9.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.gate.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.18.attention.dense.weight": "model-00003-of-00004.safetensors", - "model.layers.18.attention.query_key_value.weight": "model-00003-of-00004.safetensors", - "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.0.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.0.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.0.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.1.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.1.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.1.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.10.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.10.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.10.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.11.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.11.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.11.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.12.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.12.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.12.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.13.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.13.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.13.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.14.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.14.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.14.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.15.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.15.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.15.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.16.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.16.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.16.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.17.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.17.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.17.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.18.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.18.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.18.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.19.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.19.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.19.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.2.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.2.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.2.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.20.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.20.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.20.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.21.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.21.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.21.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.22.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.22.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.22.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.23.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.23.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.23.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.24.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.24.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.24.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.25.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.25.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.25.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.26.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.26.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.26.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.27.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.27.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.27.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.28.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.28.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.28.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.29.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.29.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.29.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.3.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.3.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.3.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.30.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.30.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.30.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.31.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.31.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.31.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.32.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.32.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.32.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.33.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.33.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.33.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.34.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.34.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.34.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.35.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.35.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.35.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.36.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.36.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.36.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.37.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.37.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.37.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.38.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.38.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.38.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.39.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.39.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.39.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.4.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.4.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.4.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.40.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.40.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.40.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.41.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.41.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.42.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.43.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.43.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.43.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.44.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.44.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.44.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.45.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.45.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.45.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.46.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.46.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.46.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.47.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.47.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.47.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.48.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.48.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.48.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.49.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.49.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.49.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.5.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.5.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.5.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.50.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.50.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.50.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.51.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.51.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.51.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.52.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.52.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.52.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.53.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.53.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.53.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.54.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.54.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.54.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.55.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.55.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.55.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.56.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.56.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.56.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.57.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.57.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.57.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.58.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.58.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.58.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.59.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.59.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.59.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.60.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.60.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.61.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.61.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.61.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.62.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.62.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.62.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.63.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.63.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.63.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.7.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.7.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.7.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.8.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.8.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.8.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.9.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.9.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.experts.9.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.gate.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.19.attention.dense.weight": "model-00003-of-00004.safetensors", - "model.layers.19.attention.query_key_value.weight": "model-00003-of-00004.safetensors", - "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.0.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.0.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.0.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.1.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.1.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.1.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.10.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.10.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.10.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.11.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.11.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.11.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.12.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.12.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.12.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.13.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.13.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.13.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.14.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.14.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.14.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.15.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.15.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.15.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.16.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.16.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.16.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.17.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.17.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.17.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.18.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.18.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.18.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.19.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.19.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.19.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.2.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.2.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.2.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.20.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.20.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.20.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.21.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.21.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.21.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.22.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.22.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.22.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.23.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.23.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.23.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.24.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.24.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.24.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.25.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.25.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.25.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.26.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.26.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.26.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.27.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.27.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.27.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.28.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.28.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.28.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.29.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.29.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.29.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.3.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.3.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.3.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.30.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.30.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.30.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.31.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.31.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.31.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.32.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.32.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.32.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.33.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.33.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.33.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.34.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.34.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.34.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.35.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.35.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.35.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.36.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.36.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.36.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.37.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.37.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.37.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.38.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.38.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.38.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.39.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.39.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.39.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.4.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.4.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.4.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.40.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.40.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.40.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.41.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.41.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.42.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.43.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.43.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.43.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.44.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.44.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.44.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.45.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.45.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.45.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.46.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.46.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.46.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.47.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.47.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.47.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.48.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.48.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.48.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.49.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.49.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.49.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.5.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.5.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.5.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.50.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.50.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.50.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.51.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.51.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.51.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.52.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.52.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.52.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.53.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.53.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.53.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.54.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.54.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.54.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.55.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.55.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.55.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.56.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.56.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.56.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.57.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.57.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.57.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.58.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.58.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.58.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.59.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.59.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.59.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.60.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.60.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.61.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.61.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.61.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.62.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.62.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.62.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.63.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.63.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.63.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.7.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.7.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.7.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.8.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.8.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.8.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.9.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.9.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.experts.9.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.gate.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.2.attention.dense.weight": "model-00001-of-00004.safetensors", - "model.layers.2.attention.query_key_value.weight": "model-00001-of-00004.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.0.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.0.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.0.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.1.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.1.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.1.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.10.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.10.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.10.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.11.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.11.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.11.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.12.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.12.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.12.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.13.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.13.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.13.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.14.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.14.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.14.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.15.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.15.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.15.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.16.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.16.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.16.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.17.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.17.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.17.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.18.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.18.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.18.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.19.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.19.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.19.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.2.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.2.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.2.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.20.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.20.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.20.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.21.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.21.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.21.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.22.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.22.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.22.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.23.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.23.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.23.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.24.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.24.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.24.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.25.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.25.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.25.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.26.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.26.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.26.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.27.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.27.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.27.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.28.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.28.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.28.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.29.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.29.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.29.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.3.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.3.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.3.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.30.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.30.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.30.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.31.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.31.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.31.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.32.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.32.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.32.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.33.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.33.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.33.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.34.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.34.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.34.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.35.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.35.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.35.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.36.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.36.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.36.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.37.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.37.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.37.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.38.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.38.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.38.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.39.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.39.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.39.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.4.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.4.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.4.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.40.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.40.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.40.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.41.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.41.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.41.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.42.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.42.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.42.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.43.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.43.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.43.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.44.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.44.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.44.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.45.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.45.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.45.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.46.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.46.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.46.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.47.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.47.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.47.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.48.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.48.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.48.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.49.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.49.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.49.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.5.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.5.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.5.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.50.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.50.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.50.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.51.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.51.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.51.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.52.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.52.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.52.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.53.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.53.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.53.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.54.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.54.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.54.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.55.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.55.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.55.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.56.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.56.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.56.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.57.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.57.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.57.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.58.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.58.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.58.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.59.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.59.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.59.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.6.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.6.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.6.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.60.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.60.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.60.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.61.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.61.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.61.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.62.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.62.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.62.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.63.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.63.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.63.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.7.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.7.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.7.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.8.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.8.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.8.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.9.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.9.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.experts.9.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.gate.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.shared_experts.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.shared_experts.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.shared_experts.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.20.attention.dense.weight": "model-00003-of-00004.safetensors", - "model.layers.20.attention.query_key_value.weight": "model-00003-of-00004.safetensors", - "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.0.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.0.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.0.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.1.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.1.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.1.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.10.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.10.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.10.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.11.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.11.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.11.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.12.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.12.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.12.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.13.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.13.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.13.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.14.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.14.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.14.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.15.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.15.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.15.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.16.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.16.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.16.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.17.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.17.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.17.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.18.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.18.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.18.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.19.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.19.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.19.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.2.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.2.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.2.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.20.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.20.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.20.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.21.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.21.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.21.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.22.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.22.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.22.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.23.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.23.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.23.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.24.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.24.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.24.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.25.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.25.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.25.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.26.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.26.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.26.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.27.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.27.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.27.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.28.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.28.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.28.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.29.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.29.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.29.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.3.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.3.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.3.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.30.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.30.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.30.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.31.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.31.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.31.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.32.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.32.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.32.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.33.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.33.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.33.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.34.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.34.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.34.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.35.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.35.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.35.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.36.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.36.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.36.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.37.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.37.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.37.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.38.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.38.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.38.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.39.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.39.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.39.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.4.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.4.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.4.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.40.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.40.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.40.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.41.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.41.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.42.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.43.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.43.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.43.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.44.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.44.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.44.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.45.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.45.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.45.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.46.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.46.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.46.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.47.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.47.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.47.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.48.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.48.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.48.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.49.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.49.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.49.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.5.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.5.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.5.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.50.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.50.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.50.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.51.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.51.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.51.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.52.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.52.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.52.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.53.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.53.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.53.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.54.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.54.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.54.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.55.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.55.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.55.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.56.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.56.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.56.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.57.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.57.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.57.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.58.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.58.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.58.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.59.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.59.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.59.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.60.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.60.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.61.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.61.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.61.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.62.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.62.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.62.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.63.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.63.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.63.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.7.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.7.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.7.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.8.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.8.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.8.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.9.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.9.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.experts.9.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.gate.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.attention.dense.weight": "model-00003-of-00004.safetensors", - "model.layers.21.attention.query_key_value.weight": "model-00003-of-00004.safetensors", - "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.0.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.0.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.0.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.1.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.1.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.1.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.10.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.10.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.10.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.11.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.11.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.11.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.12.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.12.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.12.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.13.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.13.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.13.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.14.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.14.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.14.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.15.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.15.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.15.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.16.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.16.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.16.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.17.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.17.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.17.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.18.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.18.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.18.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.19.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.19.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.19.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.2.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.2.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.2.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.20.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.20.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.20.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.21.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.21.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.21.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.22.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.22.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.22.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.23.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.23.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.23.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.24.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.24.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.24.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.25.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.25.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.25.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.26.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.26.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.26.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.27.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.27.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.27.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.28.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.28.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.28.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.29.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.29.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.29.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.3.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.3.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.3.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.30.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.30.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.30.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.31.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.31.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.31.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.32.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.32.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.32.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.33.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.33.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.33.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.34.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.34.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.34.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.35.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.35.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.35.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.36.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.36.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.36.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.37.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.37.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.37.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.38.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.38.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.38.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.39.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.39.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.39.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.4.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.4.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.4.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.40.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.40.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.40.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.41.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.41.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.42.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.43.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.43.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.43.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.44.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.44.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.44.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.45.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.45.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.45.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.46.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.46.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.46.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.47.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.47.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.47.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.48.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.48.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.48.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.49.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.49.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.49.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.5.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.5.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.5.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.50.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.50.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.50.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.51.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.51.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.51.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.52.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.52.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.52.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.53.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.53.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.53.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.54.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.54.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.54.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.55.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.55.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.55.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.56.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.56.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.56.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.57.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.57.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.57.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.58.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.58.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.58.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.59.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.59.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.59.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.60.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.60.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.61.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.61.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.61.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.62.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.62.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.62.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.63.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.63.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.63.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.7.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.7.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.7.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.8.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.8.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.8.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.9.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.9.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.experts.9.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.gate.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.attention.dense.weight": "model-00003-of-00004.safetensors", - "model.layers.22.attention.query_key_value.weight": "model-00003-of-00004.safetensors", - "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.0.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.0.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.0.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.1.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.1.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.1.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.10.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.10.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.10.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.11.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.11.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.11.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.12.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.12.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.12.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.13.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.13.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.13.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.14.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.14.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.14.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.15.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.15.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.15.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.16.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.16.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.16.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.17.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.17.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.17.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.18.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.18.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.18.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.19.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.19.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.19.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.2.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.2.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.2.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.20.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.20.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.20.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.21.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.21.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.21.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.22.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.22.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.22.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.23.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.23.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.23.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.24.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.24.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.24.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.25.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.25.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.25.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.26.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.26.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.26.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.27.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.27.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.27.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.28.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.28.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.28.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.29.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.29.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.29.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.3.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.3.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.3.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.30.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.30.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.30.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.31.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.31.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.31.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.32.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.32.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.32.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.33.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.33.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.33.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.34.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.34.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.34.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.35.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.35.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.35.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.36.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.36.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.36.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.37.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.37.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.37.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.38.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.38.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.38.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.39.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.39.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.39.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.4.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.4.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.4.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.40.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.40.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.40.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.41.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.41.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.42.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.43.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.43.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.43.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.44.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.44.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.44.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.45.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.45.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.45.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.46.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.46.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.46.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.47.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.47.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.47.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.48.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.48.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.48.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.49.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.49.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.49.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.5.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.5.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.5.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.50.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.50.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.50.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.51.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.51.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.51.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.52.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.52.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.52.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.53.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.53.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.53.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.54.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.54.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.54.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.55.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.55.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.55.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.56.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.56.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.56.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.57.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.57.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.57.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.58.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.58.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.58.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.59.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.59.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.59.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.60.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.60.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.61.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.61.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.61.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.62.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.62.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.62.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.63.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.63.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.63.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.7.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.7.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.7.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.8.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.8.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.8.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.9.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.9.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.experts.9.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.gate.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.attention.dense.weight": "model-00003-of-00004.safetensors", - "model.layers.23.attention.query_key_value.weight": "model-00003-of-00004.safetensors", - "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.0.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.0.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.0.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.1.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.1.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.1.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.10.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.10.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.10.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.11.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.11.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.11.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.12.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.12.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.12.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.13.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.13.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.13.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.14.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.14.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.14.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.15.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.15.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.15.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.16.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.16.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.16.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.17.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.17.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.17.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.18.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.18.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.18.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.19.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.19.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.19.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.2.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.2.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.2.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.20.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.20.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.20.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.21.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.21.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.21.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.22.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.22.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.22.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.23.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.23.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.23.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.24.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.24.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.24.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.25.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.25.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.25.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.26.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.26.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.26.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.27.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.27.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.27.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.28.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.28.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.28.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.29.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.29.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.29.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.3.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.3.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.3.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.30.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.30.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.30.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.31.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.31.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.31.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.32.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.32.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.32.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.33.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.33.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.33.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.34.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.34.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.34.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.35.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.35.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.35.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.36.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.36.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.36.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.37.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.37.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.37.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.38.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.38.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.38.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.39.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.39.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.39.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.4.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.4.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.4.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.40.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.40.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.40.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.41.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.41.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.42.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.43.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.43.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.43.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.44.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.44.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.44.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.45.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.45.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.45.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.46.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.46.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.46.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.47.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.47.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.47.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.48.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.48.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.48.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.49.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.49.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.49.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.5.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.5.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.5.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.50.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.50.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.50.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.51.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.51.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.51.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.52.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.52.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.52.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.53.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.53.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.53.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.54.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.54.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.54.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.55.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.55.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.55.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.56.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.56.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.56.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.57.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.57.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.57.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.58.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.58.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.58.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.59.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.59.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.59.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.60.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.60.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.61.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.61.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.61.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.62.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.62.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.62.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.63.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.63.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.63.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.7.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.7.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.8.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.8.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.8.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.9.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.9.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.experts.9.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.gate.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.attention.dense.weight": "model-00003-of-00004.safetensors", - "model.layers.24.attention.query_key_value.weight": "model-00003-of-00004.safetensors", - "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.0.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.0.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.0.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.1.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.1.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.1.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.10.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.10.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.10.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.11.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.11.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.11.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.12.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.12.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.12.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.13.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.13.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.13.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.14.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.14.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.14.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.15.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.15.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.15.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.16.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.16.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.16.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.17.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.17.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.17.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.18.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.18.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.18.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.19.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.19.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.19.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.2.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.2.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.2.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.20.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.20.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.20.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.21.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.21.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.21.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.22.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.22.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.22.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.23.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.23.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.23.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.24.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.24.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.24.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.25.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.25.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.25.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.26.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.26.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.26.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.27.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.27.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.27.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.28.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.28.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.28.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.29.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.29.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.29.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.3.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.3.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.3.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.30.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.30.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.30.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.31.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.31.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.31.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.32.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.32.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.32.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.33.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.33.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.33.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.34.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.34.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.34.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.35.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.35.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.35.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.36.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.36.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.36.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.37.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.37.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.37.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.38.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.38.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.38.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.39.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.39.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.39.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.4.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.4.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.4.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.40.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.40.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.40.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.41.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.41.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.42.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.43.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.43.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.43.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.44.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.44.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.44.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.45.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.45.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.45.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.46.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.46.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.46.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.47.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.47.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.47.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.48.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.48.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.48.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.49.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.49.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.49.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.5.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.5.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.5.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.50.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.50.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.50.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.51.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.51.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.51.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.52.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.52.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.52.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.53.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.53.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.53.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.54.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.54.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.54.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.55.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.55.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.55.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.56.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.56.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.56.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.57.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.57.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.57.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.58.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.58.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.58.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.59.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.59.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.59.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.60.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.60.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.61.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.61.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.61.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.62.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.62.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.62.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.63.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.63.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.63.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.7.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.7.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.7.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.8.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.8.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.8.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.9.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.9.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.experts.9.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.gate.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.attention.dense.weight": "model-00003-of-00004.safetensors", - "model.layers.25.attention.query_key_value.weight": "model-00003-of-00004.safetensors", - "model.layers.25.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.0.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.0.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.0.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.1.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.1.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.1.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.10.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.10.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.10.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.11.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.11.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.11.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.12.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.12.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.12.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.13.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.13.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.13.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.14.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.14.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.14.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.15.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.15.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.15.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.16.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.16.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.16.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.17.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.17.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.17.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.18.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.18.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.18.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.19.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.19.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.19.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.2.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.2.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.2.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.20.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.20.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.20.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.21.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.21.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.21.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.22.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.22.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.22.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.23.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.23.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.23.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.24.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.24.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.24.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.25.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.25.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.25.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.26.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.26.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.26.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.27.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.27.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.27.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.28.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.28.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.28.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.29.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.29.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.29.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.3.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.3.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.3.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.30.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.30.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.30.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.31.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.31.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.31.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.32.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.32.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.32.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.33.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.33.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.33.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.34.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.34.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.34.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.35.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.35.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.35.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.36.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.36.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.36.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.37.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.37.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.37.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.38.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.38.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.38.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.39.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.39.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.39.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.4.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.4.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.4.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.40.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.40.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.40.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.41.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.41.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.41.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.42.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.42.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.43.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.43.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.43.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.44.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.44.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.44.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.45.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.45.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.45.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.46.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.46.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.46.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.47.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.47.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.47.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.48.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.48.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.48.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.49.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.49.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.49.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.5.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.5.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.5.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.50.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.50.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.50.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.51.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.51.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.51.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.52.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.52.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.52.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.53.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.53.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.53.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.54.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.54.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.54.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.55.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.55.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.55.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.56.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.56.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.56.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.57.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.57.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.57.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.58.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.58.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.58.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.59.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.59.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.59.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.60.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.60.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.60.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.61.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.61.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.61.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.62.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.62.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.62.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.63.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.63.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.63.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.experts.7.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.7.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.7.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.8.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.8.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.8.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.9.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.9.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.experts.9.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.gate.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.26.attention.dense.weight": "model-00004-of-00004.safetensors", - "model.layers.26.attention.query_key_value.weight": "model-00004-of-00004.safetensors", - "model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.0.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.0.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.0.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.1.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.1.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.1.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.10.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.10.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.10.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.11.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.11.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.11.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.12.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.12.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.12.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.13.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.13.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.13.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.14.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.14.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.14.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.15.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.15.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.15.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.16.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.16.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.16.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.17.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.17.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.17.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.18.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.18.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.18.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.19.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.19.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.19.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.2.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.2.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.2.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.20.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.20.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.20.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.21.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.21.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.21.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.22.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.22.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.22.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.23.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.23.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.23.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.24.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.24.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.24.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.25.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.25.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.25.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.26.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.26.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.26.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.27.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.27.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.27.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.28.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.28.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.28.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.29.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.29.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.29.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.3.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.3.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.3.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.30.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.30.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.30.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.31.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.31.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.31.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.32.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.32.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.32.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.33.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.33.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.33.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.34.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.34.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.34.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.35.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.35.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.35.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.36.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.36.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.36.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.37.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.37.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.37.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.38.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.38.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.38.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.39.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.39.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.39.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.4.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.4.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.4.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.40.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.40.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.40.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.41.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.41.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.41.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.42.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.42.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.42.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.43.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.43.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.43.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.44.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.44.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.44.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.45.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.45.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.45.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.46.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.46.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.46.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.47.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.47.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.47.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.48.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.48.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.48.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.49.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.49.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.49.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.5.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.5.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.5.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.50.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.50.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.50.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.51.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.51.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.51.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.52.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.52.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.52.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.53.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.53.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.53.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.54.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.54.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.54.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.55.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.55.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.55.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.56.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.56.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.56.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.57.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.57.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.57.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.58.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.58.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.58.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.59.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.59.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.59.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.6.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.6.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.6.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.60.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.60.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.60.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.61.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.61.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.61.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.62.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.62.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.62.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.63.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.63.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.63.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.7.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.7.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.7.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.8.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.8.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.8.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.9.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.9.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.experts.9.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.gate.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.27.attention.dense.weight": "model-00004-of-00004.safetensors", - "model.layers.27.attention.query_key_value.weight": "model-00004-of-00004.safetensors", - "model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.0.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.0.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.0.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.1.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.1.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.1.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.10.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.10.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.10.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.11.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.11.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.11.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.12.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.12.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.12.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.13.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.13.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.13.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.14.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.14.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.14.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.15.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.15.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.15.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.16.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.16.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.16.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.17.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.17.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.17.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.18.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.18.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.18.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.19.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.19.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.19.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.2.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.2.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.2.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.20.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.20.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.20.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.21.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.21.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.21.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.22.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.22.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.22.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.23.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.23.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.23.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.24.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.24.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.24.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.25.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.25.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.25.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.26.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.26.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.26.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.27.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.27.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.27.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.28.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.28.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.28.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.29.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.29.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.29.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.3.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.3.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.3.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.30.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.30.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.30.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.31.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.31.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.31.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.32.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.32.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.32.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.33.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.33.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.33.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.34.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.34.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.34.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.35.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.35.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.35.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.36.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.36.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.36.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.37.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.37.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.37.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.38.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.38.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.38.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.39.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.39.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.39.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.4.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.4.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.4.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.40.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.40.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.40.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.41.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.41.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.41.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.42.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.42.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.42.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.43.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.43.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.43.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.44.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.44.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.44.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.45.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.45.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.45.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.46.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.46.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.46.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.47.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.47.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.47.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.48.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.48.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.48.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.49.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.49.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.49.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.5.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.5.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.5.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.50.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.50.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.50.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.51.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.51.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.51.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.52.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.52.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.52.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.53.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.53.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.53.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.54.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.54.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.54.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.55.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.55.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.55.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.56.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.56.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.56.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.57.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.57.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.57.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.58.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.58.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.58.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.59.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.59.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.59.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.6.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.6.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.6.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.60.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.60.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.60.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.61.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.61.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.61.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.62.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.62.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.62.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.63.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.63.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.63.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.7.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.7.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.7.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.8.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.8.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.8.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.9.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.9.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.experts.9.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.gate.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.3.attention.dense.weight": "model-00001-of-00004.safetensors", - "model.layers.3.attention.query_key_value.weight": "model-00001-of-00004.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.0.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.0.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.0.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.1.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.1.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.1.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.10.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.10.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.10.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.11.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.11.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.11.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.12.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.12.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.12.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.13.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.13.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.13.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.14.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.14.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.14.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.15.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.15.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.15.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.16.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.16.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.16.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.17.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.17.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.17.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.18.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.18.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.18.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.19.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.19.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.19.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.2.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.2.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.2.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.20.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.20.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.20.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.21.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.21.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.21.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.22.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.22.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.22.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.23.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.23.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.23.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.24.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.24.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.24.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.25.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.25.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.25.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.26.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.26.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.26.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.27.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.27.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.27.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.28.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.28.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.28.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.29.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.29.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.29.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.3.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.3.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.3.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.30.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.30.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.30.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.31.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.31.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.31.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.32.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.32.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.32.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.33.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.33.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.33.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.34.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.34.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.34.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.35.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.35.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.35.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.36.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.36.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.36.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.37.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.37.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.37.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.38.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.38.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.38.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.39.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.39.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.39.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.4.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.4.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.4.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.40.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.40.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.40.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.41.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.41.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.41.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.42.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.42.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.42.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.43.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.43.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.43.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.44.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.44.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.44.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.45.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.45.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.45.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.46.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.46.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.46.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.47.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.47.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.47.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.48.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.48.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.48.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.49.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.49.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.49.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.5.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.5.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.5.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.50.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.50.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.50.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.51.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.51.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.51.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.52.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.52.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.52.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.53.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.53.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.53.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.54.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.54.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.54.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.55.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.55.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.55.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.56.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.56.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.56.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.57.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.57.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.57.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.58.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.58.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.58.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.59.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.59.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.59.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.6.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.6.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.6.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.60.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.60.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.60.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.61.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.61.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.61.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.62.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.62.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.62.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.63.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.63.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.63.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.7.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.7.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.7.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.8.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.8.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.8.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.9.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.9.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.experts.9.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.gate.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.shared_experts.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.shared_experts.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.attention.dense.weight": "model-00001-of-00004.safetensors", - "model.layers.4.attention.query_key_value.weight": "model-00001-of-00004.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.0.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.0.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.1.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.1.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.10.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.10.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.11.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.11.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.12.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.12.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.13.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.13.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.14.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.14.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.15.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.15.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.16.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.16.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.16.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.17.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.17.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.17.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.18.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.18.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.18.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.19.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.19.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.19.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.2.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.2.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.20.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.20.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.20.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.21.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.21.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.21.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.22.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.22.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.22.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.23.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.23.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.23.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.24.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.24.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.24.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.25.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.25.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.25.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.26.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.26.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.26.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.27.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.27.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.27.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.28.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.28.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.28.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.29.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.29.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.29.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.3.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.3.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.30.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.30.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.30.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.31.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.31.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.31.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.32.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.32.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.32.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.33.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.33.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.33.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.34.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.34.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.34.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.35.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.35.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.35.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.36.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.36.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.36.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.37.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.37.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.37.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.38.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.38.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.38.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.39.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.39.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.39.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.4.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.4.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.40.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.40.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.40.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.41.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.41.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.41.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.42.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.42.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.42.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.43.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.43.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.43.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.44.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.44.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.44.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.45.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.45.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.45.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.46.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.46.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.46.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.47.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.47.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.47.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.48.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.48.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.48.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.49.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.49.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.49.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.5.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.5.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.50.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.50.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.50.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.51.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.51.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.51.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.52.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.52.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.52.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.53.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.53.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.53.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.54.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.54.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.54.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.55.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.55.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.55.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.56.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.56.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.56.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.57.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.57.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.57.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.58.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.58.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.58.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.59.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.59.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.59.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.6.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.6.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.60.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.60.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.60.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.61.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.61.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.61.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.62.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.62.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.62.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.63.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.63.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.63.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.7.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.7.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.8.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.8.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.9.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.experts.9.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.gate.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.attention.dense.weight": "model-00001-of-00004.safetensors", - "model.layers.5.attention.query_key_value.weight": "model-00001-of-00004.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.0.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.0.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.1.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.1.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.10.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.10.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.11.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.11.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.12.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.12.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.13.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.13.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.14.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.14.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.15.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.15.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.16.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.16.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.16.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.17.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.17.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.17.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.18.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.18.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.18.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.19.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.19.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.19.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.2.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.2.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.20.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.20.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.20.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.21.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.21.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.21.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.22.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.22.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.22.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.23.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.23.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.23.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.24.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.24.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.24.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.25.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.25.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.25.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.26.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.26.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.26.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.27.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.27.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.27.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.28.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.28.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.28.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.29.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.29.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.29.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.3.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.3.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.30.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.30.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.30.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.31.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.31.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.31.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.32.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.32.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.32.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.33.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.33.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.33.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.34.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.34.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.34.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.35.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.35.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.35.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.36.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.36.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.36.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.37.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.37.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.37.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.38.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.38.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.38.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.39.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.39.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.39.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.4.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.4.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.40.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.40.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.40.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.41.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.41.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.41.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.42.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.42.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.42.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.43.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.43.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.43.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.44.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.44.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.44.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.45.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.45.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.45.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.46.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.46.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.46.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.47.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.47.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.47.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.48.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.48.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.48.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.49.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.49.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.49.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.5.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.5.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.50.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.50.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.50.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.51.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.51.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.51.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.52.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.52.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.52.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.53.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.53.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.53.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.54.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.54.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.54.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.55.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.55.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.55.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.56.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.56.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.56.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.57.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.57.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.57.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.58.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.58.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.58.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.59.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.59.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.59.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.6.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.6.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.60.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.60.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.60.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.61.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.61.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.61.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.62.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.62.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.62.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.63.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.63.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.63.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.7.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.7.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.8.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.8.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.9.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.experts.9.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.gate.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.attention.dense.weight": "model-00001-of-00004.safetensors", - "model.layers.6.attention.query_key_value.weight": "model-00001-of-00004.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.0.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.0.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.1.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.1.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.10.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.10.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.11.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.11.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.12.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.12.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.13.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.13.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.14.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.14.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.15.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.15.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.16.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.16.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.16.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.17.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.17.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.17.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.18.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.18.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.18.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.19.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.19.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.19.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.2.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.2.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.20.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.20.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.20.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.21.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.21.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.21.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.22.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.22.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.22.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.23.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.23.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.23.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.24.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.24.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.24.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.25.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.25.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.25.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.26.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.26.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.26.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.27.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.27.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.27.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.28.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.28.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.28.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.29.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.29.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.29.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.3.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.3.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.30.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.30.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.30.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.31.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.31.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.31.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.32.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.32.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.32.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.33.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.33.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.33.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.34.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.34.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.34.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.35.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.35.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.35.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.36.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.36.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.36.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.37.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.37.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.37.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.38.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.38.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.38.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.39.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.39.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.39.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.4.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.4.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.40.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.40.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.40.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.41.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.41.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.41.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.42.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.42.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.42.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.43.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.43.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.43.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.44.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.44.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.44.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.45.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.45.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.45.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.46.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.46.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.46.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.47.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.47.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.47.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.48.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.48.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.48.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.49.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.49.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.49.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.5.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.5.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.50.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.50.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.50.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.51.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.51.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.51.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.52.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.52.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.52.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.53.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.53.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.53.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.54.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.54.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.54.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.55.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.55.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.55.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.56.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.56.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.56.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.57.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.57.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.57.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.58.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.58.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.58.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.59.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.59.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.59.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.6.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.6.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.60.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.60.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.60.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.61.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.61.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.61.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.62.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.62.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.62.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.63.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.63.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.63.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.7.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.7.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.8.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.8.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.9.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.experts.9.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.gate.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.attention.dense.weight": "model-00001-of-00004.safetensors", - "model.layers.7.attention.query_key_value.weight": "model-00001-of-00004.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.0.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.0.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.1.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.1.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.10.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.10.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.11.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.11.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.12.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.12.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.13.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.13.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.14.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.14.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.15.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.15.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.16.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.16.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.16.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.17.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.17.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.17.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.18.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.18.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.18.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.19.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.19.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.19.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.2.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.2.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.20.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.20.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.20.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.21.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.21.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.21.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.22.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.22.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.22.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.23.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.23.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.23.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.24.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.24.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.24.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.25.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.25.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.25.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.26.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.26.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.26.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.27.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.27.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.27.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.28.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.28.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.28.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.29.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.29.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.29.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.3.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.3.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.30.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.30.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.30.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.31.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.31.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.31.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.32.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.32.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.32.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.33.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.33.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.33.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.34.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.34.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.34.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.35.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.35.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.35.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.36.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.36.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.36.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.37.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.37.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.37.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.38.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.38.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.38.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.39.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.39.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.39.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.4.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.4.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.40.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.40.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.40.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.41.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.41.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.41.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.42.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.42.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.42.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.43.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.43.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.43.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.44.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.44.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.44.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.45.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.45.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.45.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.46.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.46.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.46.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.47.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.47.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.47.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.48.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.48.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.48.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.49.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.49.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.49.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.5.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.5.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.50.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.50.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.50.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.51.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.51.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.51.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.52.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.52.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.52.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.53.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.53.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.53.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.54.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.54.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.54.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.55.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.55.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.55.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.56.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.56.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.56.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.57.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.57.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.57.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.58.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.58.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.58.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.59.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.59.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.59.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.6.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.6.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.60.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.60.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.60.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.61.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.61.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.61.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.62.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.62.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.62.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.63.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.63.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.7.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.7.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.8.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.8.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.9.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.experts.9.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.gate.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.attention.dense.weight": "model-00001-of-00004.safetensors", - "model.layers.8.attention.query_key_value.weight": "model-00001-of-00004.safetensors", - "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.0.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.0.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.1.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.1.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.10.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.10.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.11.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.11.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.12.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.12.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.13.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.13.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.14.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.14.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.15.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.15.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.16.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.16.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.16.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.17.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.17.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.17.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.18.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.18.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.18.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.19.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.19.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.19.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.2.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.2.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.20.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.20.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.20.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.21.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.21.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.21.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.22.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.22.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.22.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.23.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.23.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.23.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.24.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.24.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.24.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.25.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.25.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.25.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.26.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.26.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.26.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.27.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.27.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.27.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.28.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.28.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.28.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.29.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.29.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.29.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.3.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.3.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.30.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.30.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.30.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.31.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.31.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.31.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.32.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.32.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.32.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.33.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.33.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.33.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.34.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.34.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.34.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.35.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.35.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.35.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.36.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.36.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.36.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.38.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.38.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.39.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.39.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.39.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.4.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.4.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.40.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.40.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.40.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.41.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.41.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.41.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.42.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.42.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.42.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.43.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.43.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.43.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.44.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.44.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.44.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.45.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.45.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.45.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.46.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.46.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.46.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.47.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.47.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.47.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.48.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.48.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.48.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.49.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.49.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.49.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.5.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.5.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.50.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.50.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.50.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.51.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.51.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.51.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.52.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.52.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.52.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.53.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.53.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.53.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.54.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.54.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.54.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.55.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.55.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.55.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.56.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.56.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.56.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.57.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.57.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.57.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.58.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.58.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.58.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.59.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.59.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.59.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.6.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.6.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.60.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.60.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.60.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.61.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.61.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.61.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.62.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.62.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.62.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.63.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.63.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.63.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.7.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.7.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.8.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.8.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.experts.9.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.experts.9.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.gate.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.attention.dense.weight": "model-00002-of-00004.safetensors", - "model.layers.9.attention.query_key_value.weight": "model-00002-of-00004.safetensors", - "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.0.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.0.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.1.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.1.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.10.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.10.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.11.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.11.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.12.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.12.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.13.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.13.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.14.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.14.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.15.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.15.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.16.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.16.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.16.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.17.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.17.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.17.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.18.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.18.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.18.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.19.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.19.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.19.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.2.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.2.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.20.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.20.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.20.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.21.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.21.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.21.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.22.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.22.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.22.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.23.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.23.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.23.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.24.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.24.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.24.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.25.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.25.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.25.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.26.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.26.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.26.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.27.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.27.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.27.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.28.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.28.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.28.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.29.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.29.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.29.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.3.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.3.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.30.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.30.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.30.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.31.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.31.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.31.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.32.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.32.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.32.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.33.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.33.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.33.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.34.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.34.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.34.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.35.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.35.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.35.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.36.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.36.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.36.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.38.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.38.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.39.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.39.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.39.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.4.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.4.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.40.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.40.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.40.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.41.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.41.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.41.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.42.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.42.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.42.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.43.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.43.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.43.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.44.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.44.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.44.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.45.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.45.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.45.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.46.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.46.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.46.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.47.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.47.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.47.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.48.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.48.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.48.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.49.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.49.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.49.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.5.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.5.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.50.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.50.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.50.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.51.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.51.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.51.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.52.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.52.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.52.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.53.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.53.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.53.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.54.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.54.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.54.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.55.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.55.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.55.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.56.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.56.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.56.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.57.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.57.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.57.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.58.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.58.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.58.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.59.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.59.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.59.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.6.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.6.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.60.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.60.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.60.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.61.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.61.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.61.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.62.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.62.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.62.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.63.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.63.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.63.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.7.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.7.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.8.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.8.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.9.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.experts.9.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.gate.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.norm.weight": "model-00004-of-00004.safetensors", - "model.word_embeddings.weight": "model-00001-of-00004.safetensors" + "model.word_embeddings.weight": "model-00001-of-00066.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00066.safetensors", + "model.layers.0.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.1.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.10.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.11.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.12.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.13.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.14.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.15.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.16.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.17.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.18.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.19.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.2.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.20.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.21.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.22.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.23.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.24.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.25.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.26.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.27.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.3.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.4.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.5.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.6.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.7.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.8.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.9.attention.query_key_value.weight": "model-00003-of-00066.safetensors", + "model.layers.0.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.1.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.10.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.11.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.12.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.13.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.14.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.15.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.16.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.17.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.18.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.19.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.2.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.20.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.21.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.22.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.23.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.24.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.25.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.26.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.27.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.3.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.4.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.5.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.6.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.7.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.8.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.9.attention.dense.weight": "model-00004-of-00066.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", + "model.layers.0.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.0.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.1.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.1.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.2.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.2.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.3.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00006-of-00066.safetensors", + "model.layers.0.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.1.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.2.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.3.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00007-of-00066.safetensors", + "model.layers.0.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.1.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.10.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.11.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.12.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.13.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.14.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.15.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.16.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.17.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.18.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.19.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.2.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.20.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.21.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.22.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.23.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.24.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.25.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.26.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.27.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.3.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.4.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.5.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.6.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.7.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.8.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.9.mlp.gate.weight": "model-00008-of-00066.safetensors", + "model.layers.0.mlp.experts.0.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.0.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.1.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.1.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.10.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.10.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.11.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.11.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.12.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.12.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.13.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.13.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.14.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.14.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.15.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.15.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.16.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.16.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.17.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.17.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.18.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.18.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.19.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.19.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.2.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.2.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.20.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.20.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.21.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.21.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.22.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.22.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.23.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.23.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.24.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.24.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.25.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.25.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.26.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.26.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.27.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.27.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.28.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.28.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.29.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.29.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.3.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.3.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.30.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.30.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.31.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.31.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.32.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.32.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.33.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.33.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.34.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.34.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.35.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.35.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.36.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.36.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.37.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.37.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.38.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.38.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.39.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.39.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.4.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.4.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.40.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.40.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.41.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.41.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.42.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.42.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.43.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.43.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.44.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.44.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.45.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.45.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.46.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.46.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.47.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.47.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.48.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.48.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.49.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.49.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.5.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.5.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.50.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.50.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.51.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.51.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.52.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.52.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.53.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.53.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.54.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.54.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.55.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.55.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.56.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.56.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.57.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.57.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.58.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.58.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.59.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.59.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.6.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.6.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.60.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.60.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.61.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.61.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.62.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.62.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.63.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.63.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.7.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.7.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.8.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.8.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.9.gate_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.0.mlp.experts.9.up_proj.weight": "model-00009-of-00066.safetensors", + "model.layers.1.mlp.experts.0.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.0.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.1.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.1.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.10.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.10.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.11.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.11.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.12.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.12.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.13.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.13.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.14.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.14.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.15.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.15.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.16.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.16.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.17.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.17.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.18.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.18.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.19.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.19.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.2.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.2.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.20.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.20.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.21.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.21.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.22.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.22.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.23.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.23.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.24.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.24.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.25.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.25.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.26.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.26.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.27.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.27.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.28.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.28.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.29.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.29.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.3.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.3.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.30.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.30.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.31.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.31.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.32.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.32.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.33.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.33.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.34.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.34.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.35.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.35.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.36.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.36.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.37.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.37.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.38.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.38.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.39.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.39.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.4.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.4.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.40.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.40.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.41.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.41.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.42.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.42.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.43.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.43.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.44.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.44.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.45.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.45.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.46.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.46.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.47.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.47.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.48.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.48.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.49.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.49.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.5.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.5.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.50.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.50.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.51.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.51.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.52.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.52.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.53.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.53.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.54.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.54.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.55.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.55.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.56.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.56.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.57.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.57.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.58.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.58.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.59.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.59.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.6.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.6.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.60.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.60.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.61.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.61.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.62.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.62.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.63.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.63.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.7.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.7.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.8.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.8.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.9.gate_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.1.mlp.experts.9.up_proj.weight": "model-00010-of-00066.safetensors", + "model.layers.2.mlp.experts.0.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.0.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.1.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.1.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.10.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.10.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.11.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.11.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.12.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.12.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.13.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.13.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.14.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.14.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.15.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.15.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.16.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.16.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.17.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.17.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.18.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.18.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.19.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.19.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.2.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.2.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.20.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.20.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.21.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.21.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.22.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.22.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.23.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.23.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.24.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.24.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.25.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.25.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.26.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.26.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.27.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.27.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.28.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.28.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.29.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.29.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.3.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.3.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.30.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.30.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.31.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.31.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.32.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.32.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.33.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.33.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.34.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.34.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.35.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.35.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.36.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.36.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.37.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.37.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.38.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.38.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.39.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.39.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.4.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.4.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.40.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.40.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.41.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.41.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.42.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.42.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.43.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.43.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.44.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.44.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.45.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.45.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.46.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.46.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.47.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.47.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.48.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.48.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.49.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.49.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.5.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.5.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.50.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.50.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.51.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.51.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.52.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.52.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.53.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.53.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.54.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.54.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.55.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.55.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.56.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.56.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.57.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.57.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.58.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.58.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.59.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.59.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.6.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.6.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.60.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.60.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.61.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.61.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.62.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.62.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.63.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.63.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.7.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.7.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.8.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.8.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.9.gate_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.2.mlp.experts.9.up_proj.weight": "model-00011-of-00066.safetensors", + "model.layers.3.mlp.experts.0.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.0.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.1.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.1.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.10.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.10.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.11.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.11.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.12.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.12.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.13.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.13.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.14.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.14.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.15.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.15.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.16.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.16.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.17.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.17.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.18.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.18.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.19.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.19.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.2.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.2.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.20.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.20.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.21.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.21.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.22.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.22.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.23.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.23.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.24.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.24.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.25.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.25.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.26.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.26.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.27.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.27.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.28.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.28.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.29.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.29.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.3.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.3.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.30.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.30.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.31.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.31.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.32.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.32.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.33.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.33.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.34.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.34.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.35.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.35.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.36.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.36.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.37.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.37.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.38.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.38.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.39.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.39.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.4.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.4.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.40.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.40.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.41.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.41.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.42.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.42.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.43.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.43.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.44.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.44.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.45.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.45.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.46.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.46.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.47.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.47.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.48.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.48.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.49.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.49.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.5.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.5.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.50.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.50.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.51.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.51.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.52.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.52.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.53.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.53.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.54.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.54.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.55.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.55.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.56.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.56.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.57.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.57.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.58.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.58.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.59.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.59.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.6.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.6.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.60.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.60.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.61.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.61.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.62.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.62.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.63.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.63.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.7.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.7.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.8.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.8.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.9.gate_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.3.mlp.experts.9.up_proj.weight": "model-00012-of-00066.safetensors", + "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.0.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.1.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.10.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.11.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.12.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.13.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.14.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.15.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.16.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.16.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.17.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.17.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.18.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.18.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.19.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.19.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.2.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.20.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.20.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.21.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.21.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.22.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.22.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.23.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.23.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.24.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.24.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.25.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.25.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.26.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.26.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.27.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.27.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.28.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.28.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.29.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.29.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.3.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.30.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.30.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.31.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.31.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.32.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.32.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.33.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.33.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.34.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.34.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.35.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.35.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.36.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.36.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.37.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.37.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.38.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.38.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.39.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.39.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.4.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.40.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.40.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.41.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.41.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.42.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.42.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.43.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.43.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.44.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.44.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.45.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.45.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.46.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.46.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.47.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.47.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.48.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.48.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.49.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.49.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.5.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.50.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.50.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.51.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.51.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.52.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.52.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.53.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.53.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.54.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.54.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.55.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.55.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.56.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.56.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.57.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.57.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.58.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.58.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.59.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.59.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.6.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.60.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.60.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.61.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.61.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.62.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.62.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.63.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.63.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.7.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.8.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.4.mlp.experts.9.up_proj.weight": "model-00013-of-00066.safetensors", + "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.0.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.1.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.10.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.11.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.12.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.13.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.14.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.15.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.16.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.16.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.17.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.17.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.18.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.18.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.19.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.19.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.2.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.20.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.20.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.21.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.21.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.22.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.22.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.23.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.23.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.24.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.24.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.25.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.25.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.26.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.26.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.27.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.27.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.28.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.28.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.29.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.29.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.3.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.30.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.30.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.31.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.31.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.32.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.32.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.33.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.33.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.34.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.34.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.35.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.35.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.36.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.36.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.37.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.37.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.38.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.38.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.39.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.39.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.4.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.40.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.40.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.41.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.41.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.42.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.42.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.43.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.43.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.44.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.44.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.45.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.45.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.46.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.46.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.47.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.47.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.48.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.48.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.49.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.49.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.5.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.50.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.50.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.51.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.51.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.52.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.52.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.53.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.53.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.54.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.54.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.55.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.55.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.56.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.56.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.57.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.57.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.58.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.58.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.59.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.59.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.6.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.60.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.60.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.61.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.61.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.62.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.62.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.63.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.63.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.7.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.8.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.5.mlp.experts.9.up_proj.weight": "model-00014-of-00066.safetensors", + "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.0.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.1.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.10.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.11.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.12.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.13.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.14.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.15.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.16.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.16.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.17.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.17.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.18.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.18.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.19.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.19.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.2.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.20.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.20.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.21.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.21.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.22.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.22.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.23.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.23.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.24.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.24.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.25.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.25.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.26.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.26.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.27.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.27.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.28.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.28.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.29.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.29.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.3.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.30.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.30.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.31.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.31.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.32.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.32.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.33.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.33.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.34.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.34.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.35.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.35.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.36.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.36.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.37.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.37.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.38.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.38.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.39.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.39.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.4.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.40.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.40.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.41.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.41.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.42.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.42.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.43.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.43.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.44.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.44.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.45.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.45.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.46.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.46.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.47.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.47.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.48.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.48.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.49.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.49.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.5.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.50.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.50.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.51.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.51.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.52.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.52.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.53.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.53.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.54.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.54.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.55.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.55.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.56.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.56.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.57.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.57.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.58.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.58.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.59.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.59.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.6.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.60.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.60.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.61.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.61.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.62.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.62.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.63.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.63.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.7.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.8.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.6.mlp.experts.9.up_proj.weight": "model-00015-of-00066.safetensors", + "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.0.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.1.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.10.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.11.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.12.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.13.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.14.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.15.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.16.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.16.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.17.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.17.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.18.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.18.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.19.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.19.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.2.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.20.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.20.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.21.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.21.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.22.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.22.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.23.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.23.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.24.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.24.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.25.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.25.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.26.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.26.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.27.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.27.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.28.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.28.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.29.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.29.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.3.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.30.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.30.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.31.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.31.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.32.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.32.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.33.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.33.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.34.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.34.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.35.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.35.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.36.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.36.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.37.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.37.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.38.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.38.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.39.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.39.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.4.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.40.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.40.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.41.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.41.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.42.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.42.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.43.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.43.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.44.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.44.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.45.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.45.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.46.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.46.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.47.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.47.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.48.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.48.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.49.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.49.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.5.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.50.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.50.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.51.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.51.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.52.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.52.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.53.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.53.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.54.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.54.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.55.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.55.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.56.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.56.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.57.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.57.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.58.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.58.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.59.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.59.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.6.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.60.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.60.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.61.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.61.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.62.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.62.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.63.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.7.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.8.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.7.mlp.experts.9.up_proj.weight": "model-00016-of-00066.safetensors", + "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.0.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.1.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.10.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.11.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.12.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.13.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.14.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.15.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.16.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.16.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.17.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.17.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.18.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.18.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.19.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.19.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.2.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.20.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.20.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.21.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.21.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.22.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.22.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.23.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.23.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.24.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.24.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.25.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.25.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.26.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.26.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.27.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.27.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.28.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.28.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.29.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.29.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.3.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.30.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.30.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.31.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.31.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.32.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.32.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.33.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.33.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.34.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.34.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.35.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.35.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.36.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.36.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.37.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.38.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.38.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.39.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.39.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.4.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.40.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.40.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.41.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.41.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.42.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.42.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.43.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.43.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.44.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.44.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.45.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.45.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.46.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.46.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.47.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.47.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.48.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.48.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.49.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.49.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.5.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.50.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.50.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.51.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.51.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.52.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.52.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.53.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.53.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.54.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.54.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.55.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.55.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.56.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.56.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.57.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.57.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.58.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.58.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.59.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.59.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.6.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.60.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.60.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.61.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.61.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.62.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.62.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.63.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.63.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.7.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.8.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.8.mlp.experts.9.up_proj.weight": "model-00017-of-00066.safetensors", + "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.0.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.1.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.10.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.11.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.12.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.13.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.14.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.15.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.16.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.16.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.17.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.17.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.18.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.18.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.19.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.19.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.2.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.20.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.20.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.21.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.21.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.22.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.22.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.23.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.23.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.24.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.24.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.25.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.25.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.26.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.26.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.27.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.27.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.28.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.28.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.29.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.29.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.3.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.30.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.30.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.31.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.31.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.32.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.32.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.33.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.33.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.34.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.34.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.35.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.35.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.36.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.36.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.37.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.37.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.38.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.38.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.39.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.39.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.4.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.40.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.40.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.41.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.41.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.42.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.42.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.43.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.43.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.44.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.44.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.45.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.45.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.46.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.46.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.47.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.47.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.48.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.48.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.49.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.49.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.5.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.50.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.50.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.51.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.51.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.52.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.52.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.53.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.53.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.54.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.54.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.55.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.55.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.56.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.56.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.57.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.57.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.58.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.58.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.59.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.59.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.6.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.60.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.60.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.61.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.61.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.62.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.62.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.63.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.63.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.7.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.8.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.9.mlp.experts.9.up_proj.weight": "model-00018-of-00066.safetensors", + "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.0.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.1.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.10.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.11.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.12.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.13.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.14.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.15.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.16.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.16.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.17.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.17.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.18.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.18.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.19.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.19.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.2.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.20.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.20.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.21.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.21.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.22.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.22.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.23.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.23.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.24.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.24.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.25.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.25.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.26.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.26.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.27.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.27.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.28.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.28.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.29.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.29.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.3.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.30.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.30.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.31.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.31.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.32.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.32.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.33.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.33.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.34.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.34.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.35.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.35.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.36.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.36.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.37.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.37.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.38.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.38.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.39.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.39.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.4.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.40.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.40.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.41.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.41.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.42.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.42.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.43.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.43.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.44.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.44.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.45.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.45.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.46.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.46.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.47.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.47.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.48.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.48.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.49.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.49.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.5.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.50.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.50.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.51.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.51.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.52.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.52.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.53.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.53.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.54.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.54.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.55.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.55.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.56.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.56.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.57.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.57.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.58.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.58.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.59.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.59.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.6.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.60.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.60.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.61.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.61.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.62.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.62.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.63.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.63.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.7.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.8.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.10.mlp.experts.9.up_proj.weight": "model-00019-of-00066.safetensors", + "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.0.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.1.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.10.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.11.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.12.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.13.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.14.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.15.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.16.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.16.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.17.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.17.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.18.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.18.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.19.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.19.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.2.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.20.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.20.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.21.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.21.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.22.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.22.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.23.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.23.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.24.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.24.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.25.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.25.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.26.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.26.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.27.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.27.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.28.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.28.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.29.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.29.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.3.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.30.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.30.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.31.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.31.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.32.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.32.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.33.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.33.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.34.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.34.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.35.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.35.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.36.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.36.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.37.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.37.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.38.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.38.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.39.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.39.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.4.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.40.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.40.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.41.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.41.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.42.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.42.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.43.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.43.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.44.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.44.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.45.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.45.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.46.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.46.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.47.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.47.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.48.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.48.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.49.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.49.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.5.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.50.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.50.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.51.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.51.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.52.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.52.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.53.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.53.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.54.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.54.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.55.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.55.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.56.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.56.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.57.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.57.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.58.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.58.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.59.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.59.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.6.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.60.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.60.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.61.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.61.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.62.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.62.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.63.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.63.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.7.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.8.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.11.mlp.experts.9.up_proj.weight": "model-00020-of-00066.safetensors", + "model.layers.12.mlp.experts.0.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.0.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.1.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.1.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.10.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.10.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.11.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.11.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.12.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.12.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.13.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.13.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.14.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.14.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.15.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.15.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.16.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.16.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.17.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.17.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.18.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.18.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.19.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.19.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.2.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.2.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.20.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.20.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.21.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.21.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.22.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.22.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.23.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.23.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.24.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.24.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.25.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.25.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.26.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.26.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.27.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.27.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.28.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.28.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.29.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.29.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.3.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.3.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.30.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.30.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.31.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.31.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.32.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.32.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.33.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.33.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.34.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.34.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.35.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.35.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.36.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.36.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.37.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.37.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.38.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.38.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.39.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.39.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.4.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.4.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.40.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.40.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.41.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.41.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.42.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.42.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.43.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.43.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.44.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.44.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.45.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.45.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.46.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.46.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.47.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.47.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.48.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.48.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.49.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.49.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.5.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.5.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.50.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.50.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.51.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.51.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.52.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.52.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.53.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.53.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.54.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.54.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.55.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.55.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.56.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.56.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.57.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.57.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.58.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.58.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.59.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.59.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.6.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.6.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.60.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.60.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.61.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.61.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.62.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.62.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.63.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.63.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.7.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.7.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.8.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.8.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.9.gate_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.12.mlp.experts.9.up_proj.weight": "model-00021-of-00066.safetensors", + "model.layers.13.mlp.experts.0.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.0.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.1.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.1.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.10.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.10.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.11.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.11.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.12.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.12.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.13.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.13.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.14.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.14.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.15.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.15.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.16.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.16.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.17.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.17.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.18.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.18.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.19.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.19.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.2.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.2.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.20.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.20.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.21.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.21.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.22.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.22.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.23.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.23.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.24.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.24.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.25.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.25.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.26.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.26.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.27.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.27.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.28.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.28.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.29.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.29.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.3.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.3.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.30.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.30.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.31.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.31.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.32.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.32.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.33.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.33.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.34.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.34.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.35.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.35.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.36.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.36.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.37.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.37.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.38.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.38.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.39.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.39.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.4.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.4.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.40.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.40.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.41.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.41.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.42.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.42.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.43.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.43.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.44.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.44.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.45.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.45.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.46.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.46.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.47.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.47.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.48.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.48.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.49.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.49.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.5.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.5.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.50.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.50.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.51.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.51.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.52.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.52.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.53.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.53.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.54.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.54.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.55.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.55.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.56.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.56.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.57.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.57.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.58.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.58.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.59.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.59.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.6.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.6.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.60.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.60.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.61.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.61.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.62.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.62.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.63.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.63.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.7.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.7.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.8.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.8.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.9.gate_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.13.mlp.experts.9.up_proj.weight": "model-00022-of-00066.safetensors", + "model.layers.14.mlp.experts.0.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.0.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.1.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.1.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.10.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.10.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.11.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.11.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.12.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.12.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.13.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.13.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.14.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.14.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.15.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.15.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.16.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.16.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.17.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.17.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.18.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.18.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.19.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.19.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.2.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.2.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.20.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.20.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.21.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.21.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.22.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.22.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.23.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.23.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.24.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.24.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.25.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.25.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.26.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.26.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.27.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.27.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.28.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.28.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.29.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.29.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.3.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.3.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.30.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.30.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.31.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.31.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.32.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.32.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.33.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.33.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.34.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.34.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.35.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.35.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.36.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.36.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.37.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.37.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.38.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.38.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.39.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.39.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.4.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.4.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.40.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.40.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.41.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.41.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.42.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.42.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.43.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.43.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.44.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.44.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.45.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.45.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.46.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.46.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.47.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.47.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.48.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.48.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.49.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.49.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.5.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.5.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.50.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.50.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.51.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.51.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.52.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.52.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.53.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.53.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.54.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.54.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.55.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.55.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.56.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.56.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.57.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.57.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.58.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.58.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.59.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.59.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.6.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.6.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.60.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.60.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.61.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.61.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.62.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.62.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.63.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.63.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.7.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.7.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.8.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.8.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.9.gate_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.14.mlp.experts.9.up_proj.weight": "model-00023-of-00066.safetensors", + "model.layers.15.mlp.experts.0.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.0.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.1.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.1.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.10.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.10.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.11.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.11.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.12.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.12.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.13.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.13.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.14.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.14.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.15.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.15.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.16.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.16.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.17.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.17.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.18.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.18.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.19.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.19.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.2.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.2.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.20.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.20.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.21.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.21.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.22.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.22.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.23.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.23.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.24.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.24.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.25.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.25.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.26.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.26.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.27.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.27.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.28.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.28.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.29.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.29.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.3.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.3.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.30.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.30.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.31.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.31.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.32.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.32.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.33.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.33.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.34.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.34.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.35.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.35.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.36.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.36.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.37.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.37.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.38.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.38.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.39.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.39.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.4.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.4.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.40.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.40.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.41.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.41.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.42.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.42.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.43.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.43.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.44.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.44.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.45.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.45.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.46.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.46.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.47.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.47.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.48.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.48.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.49.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.49.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.5.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.5.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.50.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.50.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.51.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.51.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.52.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.52.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.53.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.53.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.54.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.54.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.55.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.55.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.56.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.56.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.57.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.57.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.58.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.58.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.59.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.59.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.6.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.6.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.60.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.60.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.61.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.61.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.62.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.62.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.63.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.63.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.7.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.7.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.8.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.8.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.9.gate_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.15.mlp.experts.9.up_proj.weight": "model-00024-of-00066.safetensors", + "model.layers.16.mlp.experts.0.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.0.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.1.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.1.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.10.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.10.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.11.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.11.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.12.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.12.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.13.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.13.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.14.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.14.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.15.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.15.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.16.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.16.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.17.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.17.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.18.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.18.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.19.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.19.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.2.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.2.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.20.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.20.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.21.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.21.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.22.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.22.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.23.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.23.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.24.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.24.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.25.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.25.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.26.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.26.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.27.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.27.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.28.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.28.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.29.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.29.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.3.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.3.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.30.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.30.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.31.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.31.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.32.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.32.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.33.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.33.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.34.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.34.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.35.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.35.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.36.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.36.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.37.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.37.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.38.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.38.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.39.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.39.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.4.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.4.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.40.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.40.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.41.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.41.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.42.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.42.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.43.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.43.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.44.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.44.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.45.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.45.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.46.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.46.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.47.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.47.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.48.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.48.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.49.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.49.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.5.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.5.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.50.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.50.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.51.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.51.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.52.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.52.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.53.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.53.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.54.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.54.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.55.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.55.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.56.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.56.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.57.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.57.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.58.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.58.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.59.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.59.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.6.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.6.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.60.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.60.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.61.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.61.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.62.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.62.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.63.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.63.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.7.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.7.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.8.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.8.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.9.gate_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.16.mlp.experts.9.up_proj.weight": "model-00025-of-00066.safetensors", + "model.layers.17.mlp.experts.0.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.0.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.1.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.1.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.10.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.10.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.11.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.11.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.12.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.12.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.13.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.13.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.14.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.14.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.15.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.15.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.16.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.16.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.17.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.17.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.18.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.18.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.19.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.19.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.2.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.2.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.20.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.20.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.21.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.21.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.22.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.22.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.23.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.23.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.24.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.24.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.25.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.25.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.26.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.26.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.27.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.27.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.28.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.28.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.29.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.29.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.3.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.3.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.30.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.30.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.31.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.31.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.32.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.32.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.33.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.33.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.34.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.34.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.35.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.35.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.36.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.36.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.37.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.37.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.38.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.38.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.39.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.39.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.4.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.4.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.40.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.40.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.41.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.41.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.42.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.42.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.43.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.43.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.44.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.44.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.45.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.45.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.46.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.46.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.47.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.47.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.48.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.48.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.49.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.49.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.5.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.5.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.50.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.50.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.51.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.51.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.52.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.52.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.53.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.53.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.54.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.54.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.55.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.55.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.56.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.56.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.57.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.57.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.58.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.58.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.59.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.59.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.6.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.60.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.60.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.61.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.61.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.62.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.62.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.63.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.63.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.7.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.7.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.8.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.8.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.9.gate_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.17.mlp.experts.9.up_proj.weight": "model-00026-of-00066.safetensors", + "model.layers.18.mlp.experts.0.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.0.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.1.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.1.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.10.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.10.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.11.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.11.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.12.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.12.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.13.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.13.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.14.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.14.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.15.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.15.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.16.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.16.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.17.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.17.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.18.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.18.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.19.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.19.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.2.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.2.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.20.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.20.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.21.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.21.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.22.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.22.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.23.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.23.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.24.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.24.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.25.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.25.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.26.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.26.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.27.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.27.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.28.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.28.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.29.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.29.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.3.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.3.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.30.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.30.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.31.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.31.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.32.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.32.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.33.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.33.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.34.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.34.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.35.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.35.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.36.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.36.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.37.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.37.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.38.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.38.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.39.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.39.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.4.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.4.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.40.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.40.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.41.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.41.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.42.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.42.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.43.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.43.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.44.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.44.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.45.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.45.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.46.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.46.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.47.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.47.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.48.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.48.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.49.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.49.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.5.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.5.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.50.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.50.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.51.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.51.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.52.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.52.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.53.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.53.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.54.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.54.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.55.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.55.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.56.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.56.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.57.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.57.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.58.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.58.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.59.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.59.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.6.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.6.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.60.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.60.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.61.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.61.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.62.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.62.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.63.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.63.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.7.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.7.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.8.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.8.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.9.gate_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.18.mlp.experts.9.up_proj.weight": "model-00027-of-00066.safetensors", + "model.layers.19.mlp.experts.0.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.0.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.1.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.1.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.10.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.10.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.11.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.11.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.12.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.12.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.13.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.13.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.14.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.14.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.15.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.15.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.16.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.16.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.17.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.17.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.18.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.18.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.19.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.19.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.2.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.2.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.20.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.20.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.21.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.21.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.22.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.22.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.23.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.23.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.24.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.24.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.25.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.25.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.26.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.26.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.27.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.27.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.28.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.28.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.29.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.29.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.3.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.3.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.30.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.30.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.31.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.31.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.32.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.32.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.33.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.33.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.34.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.34.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.35.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.35.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.36.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.36.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.37.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.37.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.38.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.38.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.39.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.39.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.4.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.4.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.40.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.40.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.41.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.41.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.42.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.42.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.43.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.43.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.44.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.44.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.45.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.45.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.46.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.46.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.47.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.47.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.48.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.48.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.49.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.49.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.5.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.5.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.50.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.50.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.51.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.51.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.52.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.52.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.53.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.53.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.54.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.54.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.55.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.55.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.56.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.56.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.57.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.57.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.58.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.58.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.59.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.59.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.6.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.6.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.60.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.60.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.61.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.61.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.62.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.62.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.63.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.63.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.7.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.7.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.8.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.8.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.9.gate_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.19.mlp.experts.9.up_proj.weight": "model-00028-of-00066.safetensors", + "model.layers.20.mlp.experts.0.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.0.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.1.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.1.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.10.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.10.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.11.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.11.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.12.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.12.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.13.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.13.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.14.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.14.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.15.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.15.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.16.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.16.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.17.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.17.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.18.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.18.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.19.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.19.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.2.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.2.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.20.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.20.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.21.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.21.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.22.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.22.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.23.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.23.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.24.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.24.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.25.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.25.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.26.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.26.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.27.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.27.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.28.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.28.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.29.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.29.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.3.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.3.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.30.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.30.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.31.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.31.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.32.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.32.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.33.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.33.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.34.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.34.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.35.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.35.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.36.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.36.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.37.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.37.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.38.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.38.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.39.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.39.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.4.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.4.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.40.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.40.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.41.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.41.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.42.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.42.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.43.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.43.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.44.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.44.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.45.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.45.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.46.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.46.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.47.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.47.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.48.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.48.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.49.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.49.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.5.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.5.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.50.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.50.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.51.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.51.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.52.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.52.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.53.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.53.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.54.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.54.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.55.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.55.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.56.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.56.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.57.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.57.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.58.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.58.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.59.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.59.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.6.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.6.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.60.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.60.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.61.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.61.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.62.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.62.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.63.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.63.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.7.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.7.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.8.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.8.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.9.gate_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.20.mlp.experts.9.up_proj.weight": "model-00029-of-00066.safetensors", + "model.layers.21.mlp.experts.0.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.0.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.1.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.1.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.10.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.10.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.11.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.11.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.12.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.12.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.13.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.13.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.14.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.14.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.15.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.15.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.16.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.16.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.17.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.17.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.18.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.18.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.19.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.19.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.2.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.2.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.20.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.20.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.21.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.21.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.22.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.22.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.23.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.23.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.24.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.24.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.25.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.25.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.26.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.26.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.27.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.27.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.28.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.28.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.29.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.29.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.3.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.3.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.30.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.30.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.31.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.31.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.32.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.32.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.33.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.33.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.34.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.34.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.35.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.35.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.36.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.36.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.37.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.37.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.38.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.38.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.39.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.39.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.4.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.4.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.40.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.40.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.41.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.41.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.42.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.42.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.43.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.43.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.44.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.44.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.45.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.45.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.46.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.46.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.47.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.47.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.48.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.48.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.49.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.49.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.5.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.5.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.50.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.50.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.51.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.51.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.52.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.52.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.53.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.53.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.54.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.54.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.55.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.55.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.56.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.56.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.57.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.57.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.58.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.58.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.59.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.59.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.6.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.6.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.60.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.60.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.61.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.61.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.62.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.62.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.63.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.63.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.7.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.7.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.8.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.8.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.9.gate_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.21.mlp.experts.9.up_proj.weight": "model-00030-of-00066.safetensors", + "model.layers.22.mlp.experts.0.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.0.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.1.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.1.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.10.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.10.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.11.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.11.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.12.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.12.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.13.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.13.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.14.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.14.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.15.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.15.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.16.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.16.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.17.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.17.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.18.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.18.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.19.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.19.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.2.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.2.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.20.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.20.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.21.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.21.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.22.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.22.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.23.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.23.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.24.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.24.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.25.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.25.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.26.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.26.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.27.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.27.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.28.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.28.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.29.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.29.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.3.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.3.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.30.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.30.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.31.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.31.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.32.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.32.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.33.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.33.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.34.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.34.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.35.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.35.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.36.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.36.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.37.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.37.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.38.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.38.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.39.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.39.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.4.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.4.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.40.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.40.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.41.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.41.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.42.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.42.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.43.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.43.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.44.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.44.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.45.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.45.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.46.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.46.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.47.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.47.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.48.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.48.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.49.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.49.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.5.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.5.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.50.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.50.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.51.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.51.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.52.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.52.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.53.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.53.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.54.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.54.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.55.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.55.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.56.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.56.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.57.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.57.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.58.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.58.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.59.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.59.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.6.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.6.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.60.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.60.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.61.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.61.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.62.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.62.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.63.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.63.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.7.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.7.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.8.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.8.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.9.gate_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.22.mlp.experts.9.up_proj.weight": "model-00031-of-00066.safetensors", + "model.layers.23.mlp.experts.0.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.0.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.1.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.1.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.10.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.10.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.11.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.11.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.12.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.12.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.13.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.13.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.14.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.14.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.15.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.15.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.16.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.16.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.17.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.17.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.18.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.18.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.19.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.19.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.2.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.2.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.20.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.20.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.21.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.21.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.22.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.22.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.23.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.23.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.24.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.24.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.25.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.25.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.26.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.26.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.27.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.27.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.28.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.28.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.29.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.29.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.3.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.3.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.30.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.30.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.31.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.31.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.32.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.32.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.33.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.33.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.34.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.34.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.35.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.35.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.36.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.36.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.37.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.37.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.38.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.38.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.39.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.39.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.4.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.4.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.40.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.40.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.41.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.41.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.42.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.42.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.43.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.43.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.44.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.44.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.45.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.45.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.46.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.46.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.47.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.47.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.48.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.48.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.49.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.49.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.5.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.5.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.50.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.50.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.51.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.51.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.52.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.52.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.53.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.53.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.54.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.54.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.55.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.55.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.56.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.56.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.57.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.57.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.58.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.58.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.59.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.59.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.6.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.6.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.60.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.60.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.61.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.61.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.62.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.62.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.63.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.63.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.7.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.8.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.8.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.9.gate_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.23.mlp.experts.9.up_proj.weight": "model-00032-of-00066.safetensors", + "model.layers.24.mlp.experts.0.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.0.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.1.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.1.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.10.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.10.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.11.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.11.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.12.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.12.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.13.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.13.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.14.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.14.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.15.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.15.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.16.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.16.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.17.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.17.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.18.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.18.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.19.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.19.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.2.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.2.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.20.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.20.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.21.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.21.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.22.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.22.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.23.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.23.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.24.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.24.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.25.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.25.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.26.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.26.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.27.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.27.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.28.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.28.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.29.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.29.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.3.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.3.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.30.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.30.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.31.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.31.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.32.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.32.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.33.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.33.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.34.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.34.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.35.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.35.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.36.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.36.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.37.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.37.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.38.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.38.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.39.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.39.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.4.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.4.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.40.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.40.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.41.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.41.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.42.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.42.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.43.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.43.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.44.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.44.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.45.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.45.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.46.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.46.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.47.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.47.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.48.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.48.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.49.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.49.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.5.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.5.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.50.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.50.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.51.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.51.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.52.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.52.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.53.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.53.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.54.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.54.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.55.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.55.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.56.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.56.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.57.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.57.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.58.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.58.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.59.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.59.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.6.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.6.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.60.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.60.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.61.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.61.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.62.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.62.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.63.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.63.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.7.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.7.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.8.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.8.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.9.gate_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.24.mlp.experts.9.up_proj.weight": "model-00033-of-00066.safetensors", + "model.layers.25.mlp.experts.0.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.0.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.1.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.1.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.10.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.10.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.11.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.11.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.12.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.12.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.13.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.13.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.14.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.14.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.15.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.15.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.16.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.16.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.17.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.17.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.18.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.18.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.19.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.19.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.2.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.2.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.20.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.20.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.21.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.21.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.22.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.22.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.23.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.23.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.24.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.24.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.25.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.25.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.26.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.26.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.27.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.27.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.28.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.28.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.29.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.29.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.3.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.3.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.30.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.30.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.31.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.31.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.32.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.32.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.33.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.33.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.34.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.34.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.35.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.35.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.36.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.36.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.37.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.37.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.38.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.38.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.39.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.39.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.4.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.4.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.40.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.40.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.41.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.41.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.42.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.43.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.43.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.44.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.44.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.45.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.45.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.46.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.46.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.47.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.47.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.48.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.48.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.49.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.49.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.5.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.5.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.50.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.50.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.51.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.51.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.52.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.52.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.53.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.53.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.54.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.54.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.55.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.55.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.56.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.56.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.57.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.57.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.58.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.58.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.59.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.59.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.6.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.6.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.60.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.60.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.61.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.61.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.62.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.62.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.63.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.63.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.7.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.7.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.8.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.8.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.9.gate_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.25.mlp.experts.9.up_proj.weight": "model-00034-of-00066.safetensors", + "model.layers.26.mlp.experts.0.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.0.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.1.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.1.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.10.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.10.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.11.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.11.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.12.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.12.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.13.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.13.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.14.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.14.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.15.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.15.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.16.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.16.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.17.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.17.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.18.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.18.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.19.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.19.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.2.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.2.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.20.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.20.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.21.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.21.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.22.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.22.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.23.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.23.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.24.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.24.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.25.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.25.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.26.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.26.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.27.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.27.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.28.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.28.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.29.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.29.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.3.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.3.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.30.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.30.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.31.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.31.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.32.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.32.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.33.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.33.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.34.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.34.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.35.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.35.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.36.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.36.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.37.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.37.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.38.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.38.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.39.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.39.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.4.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.4.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.40.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.40.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.41.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.41.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.42.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.42.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.43.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.43.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.44.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.44.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.45.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.45.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.46.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.46.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.47.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.47.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.48.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.48.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.49.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.49.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.5.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.5.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.50.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.50.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.51.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.51.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.52.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.52.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.53.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.53.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.54.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.54.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.55.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.55.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.56.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.56.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.57.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.57.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.58.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.58.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.59.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.59.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.6.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.6.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.60.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.60.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.61.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.61.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.62.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.62.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.63.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.63.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.7.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.7.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.8.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.8.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.9.gate_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.26.mlp.experts.9.up_proj.weight": "model-00035-of-00066.safetensors", + "model.layers.27.mlp.experts.0.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.0.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.1.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.1.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.10.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.10.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.11.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.11.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.12.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.12.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.13.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.13.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.14.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.14.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.15.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.15.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.16.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.16.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.17.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.17.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.18.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.18.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.19.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.19.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.2.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.2.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.20.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.20.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.21.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.21.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.22.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.22.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.23.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.23.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.24.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.24.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.25.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.25.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.26.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.26.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.27.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.27.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.28.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.28.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.29.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.29.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.3.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.3.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.30.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.30.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.31.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.31.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.32.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.32.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.33.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.33.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.34.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.34.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.35.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.35.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.36.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.36.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.37.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.37.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.38.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.38.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.39.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.39.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.4.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.4.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.40.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.40.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.41.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.41.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.42.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.42.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.43.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.43.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.44.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.44.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.45.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.45.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.46.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.46.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.47.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.47.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.48.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.48.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.49.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.49.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.5.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.5.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.50.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.50.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.51.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.51.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.52.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.52.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.53.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.53.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.54.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.54.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.55.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.55.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.56.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.56.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.57.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.57.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.58.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.58.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.59.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.59.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.6.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.6.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.60.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.60.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.61.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.61.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.62.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.62.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.63.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.63.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.7.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.7.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.8.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.8.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.9.gate_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.27.mlp.experts.9.up_proj.weight": "model-00036-of-00066.safetensors", + "model.layers.0.mlp.experts.0.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.1.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.10.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.11.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.12.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.13.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.14.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.15.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.16.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.17.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.18.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.19.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.2.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.20.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.21.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.22.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.23.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.24.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.25.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.26.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.27.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.28.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.29.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.3.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.30.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.31.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.32.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.33.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.34.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.35.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.36.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.37.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.38.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.39.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.4.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.40.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.41.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.42.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.43.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.44.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.45.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.46.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.47.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.48.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.49.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.5.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.50.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.51.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.52.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.53.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.54.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.55.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.56.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.57.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.58.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.59.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.6.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.60.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.61.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.62.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.63.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.7.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.8.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.0.mlp.experts.9.down_proj.weight": "model-00037-of-00066.safetensors", + "model.layers.1.mlp.experts.0.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.1.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.10.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.11.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.12.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.13.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.14.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.15.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.16.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.17.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.18.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.19.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.2.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.20.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.21.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.22.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.23.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.24.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.25.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.26.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.27.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.28.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.29.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.3.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.30.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.31.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.32.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.33.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.34.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.35.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.36.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.37.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.38.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.39.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.4.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.40.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.41.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.42.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.43.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.44.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.45.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.46.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.47.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.48.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.49.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.5.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.50.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.51.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.52.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.53.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.54.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.55.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.56.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.57.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.58.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.59.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.6.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.60.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.61.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.62.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.63.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.7.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.8.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.1.mlp.experts.9.down_proj.weight": "model-00038-of-00066.safetensors", + "model.layers.2.mlp.experts.0.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.1.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.10.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.11.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.12.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.13.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.14.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.15.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.16.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.17.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.18.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.19.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.2.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.20.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.21.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.22.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.23.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.24.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.25.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.26.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.27.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.28.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.29.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.3.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.30.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.31.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.32.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.33.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.34.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.35.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.36.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.37.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.38.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.39.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.4.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.40.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.41.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.42.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.43.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.44.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.45.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.46.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.47.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.48.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.49.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.5.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.50.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.51.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.52.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.53.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.54.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.55.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.56.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.57.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.58.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.59.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.6.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.60.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.61.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.62.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.63.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.7.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.8.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.2.mlp.experts.9.down_proj.weight": "model-00039-of-00066.safetensors", + "model.layers.3.mlp.experts.0.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.1.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.10.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.11.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.12.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.13.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.14.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.15.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.16.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.17.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.18.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.19.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.2.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.20.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.21.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.22.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.23.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.24.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.25.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.26.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.27.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.28.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.29.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.3.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.30.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.31.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.32.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.33.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.34.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.35.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.36.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.37.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.38.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.39.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.4.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.40.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.41.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.42.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.43.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.44.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.45.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.46.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.47.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.48.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.49.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.5.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.50.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.51.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.52.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.53.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.54.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.55.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.56.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.57.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.58.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.59.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.6.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.60.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.61.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.62.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.63.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.7.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.8.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.3.mlp.experts.9.down_proj.weight": "model-00040-of-00066.safetensors", + "model.layers.4.mlp.experts.0.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.1.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.10.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.11.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.12.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.13.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.14.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.15.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.16.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.17.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.18.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.19.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.2.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.20.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.21.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.22.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.23.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.24.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.25.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.26.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.27.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.28.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.29.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.3.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.30.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.31.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.32.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.33.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.34.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.35.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.36.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.37.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.38.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.39.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.4.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.40.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.41.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.42.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.43.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.44.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.45.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.46.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.47.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.48.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.49.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.5.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.50.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.51.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.52.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.53.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.54.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.55.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.56.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.57.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.58.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.59.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.6.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.60.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.61.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.62.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.63.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.7.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.8.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.4.mlp.experts.9.down_proj.weight": "model-00041-of-00066.safetensors", + "model.layers.5.mlp.experts.0.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.1.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.10.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.11.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.12.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.13.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.14.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.15.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.16.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.17.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.18.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.19.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.2.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.20.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.21.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.22.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.23.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.24.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.25.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.26.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.27.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.28.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.29.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.3.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.30.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.31.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.32.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.33.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.34.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.35.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.36.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.37.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.38.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.39.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.4.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.40.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.41.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.42.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.43.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.44.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.45.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.46.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.47.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.48.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.49.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.5.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.50.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.51.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.52.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.53.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.54.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.55.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.56.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.57.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.58.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.59.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.6.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.60.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.61.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.62.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.63.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.7.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.8.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.5.mlp.experts.9.down_proj.weight": "model-00042-of-00066.safetensors", + "model.layers.6.mlp.experts.0.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.1.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.10.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.11.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.12.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.13.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.14.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.15.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.16.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.17.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.18.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.19.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.2.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.20.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.21.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.22.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.23.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.24.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.25.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.26.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.27.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.28.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.29.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.3.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.30.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.31.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.32.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.33.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.34.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.35.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.36.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.37.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.38.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.39.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.4.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.40.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.41.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.42.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.43.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.44.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.45.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.46.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.47.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.48.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.49.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.5.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.50.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.51.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.52.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.53.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.54.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.55.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.56.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.57.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.58.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.59.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.6.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.60.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.61.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.62.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.63.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.7.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.8.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.6.mlp.experts.9.down_proj.weight": "model-00043-of-00066.safetensors", + "model.layers.7.mlp.experts.0.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.1.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.10.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.11.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.12.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.13.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.14.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.15.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.16.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.17.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.18.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.19.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.2.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.20.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.21.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.22.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.23.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.24.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.25.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.26.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.27.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.28.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.29.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.3.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.30.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.31.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.32.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.33.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.34.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.35.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.36.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.37.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.38.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.39.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.4.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.40.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.41.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.42.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.43.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.44.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.45.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.46.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.47.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.48.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.49.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.5.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.50.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.51.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.52.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.53.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.54.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.55.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.56.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.57.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.58.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.59.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.6.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.60.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.61.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.62.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.63.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.7.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.8.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.7.mlp.experts.9.down_proj.weight": "model-00044-of-00066.safetensors", + "model.layers.8.mlp.experts.0.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.1.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.10.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.11.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.12.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.13.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.14.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.15.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.16.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.17.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.18.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.19.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.2.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.20.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.21.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.22.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.23.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.24.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.25.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.26.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.27.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.28.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.29.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.3.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.30.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.31.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.32.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.33.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.34.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.35.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.36.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.37.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.38.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.39.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.4.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.40.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.41.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.42.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.43.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.44.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.45.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.46.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.47.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.48.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.49.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.5.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.50.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.51.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.52.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.53.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.54.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.55.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.56.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.57.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.58.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.59.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.6.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.60.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.61.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.62.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.63.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.7.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.8.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.8.mlp.experts.9.down_proj.weight": "model-00045-of-00066.safetensors", + "model.layers.9.mlp.experts.0.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.1.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.10.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.11.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.12.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.13.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.14.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.15.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.16.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.17.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.18.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.19.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.2.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.20.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.21.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.22.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.23.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.24.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.25.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.26.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.27.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.28.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.29.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.3.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.30.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.31.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.32.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.33.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.34.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.35.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.36.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.37.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.38.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.39.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.4.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.40.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.41.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.42.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.43.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.44.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.45.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.46.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.47.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.48.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.49.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.5.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.50.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.51.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.52.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.53.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.54.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.55.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.56.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.57.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.58.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.59.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.6.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.60.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.61.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.62.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.63.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.7.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.8.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.9.mlp.experts.9.down_proj.weight": "model-00046-of-00066.safetensors", + "model.layers.10.mlp.experts.0.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.1.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.10.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.11.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.12.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.13.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.14.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.15.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.16.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.17.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.18.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.19.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.2.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.20.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.21.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.22.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.23.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.24.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.25.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.26.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.27.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.28.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.29.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.3.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.30.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.31.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.32.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.33.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.34.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.35.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.36.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.37.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.38.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.39.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.4.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.40.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.41.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.42.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.43.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.44.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.45.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.46.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.47.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.48.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.49.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.5.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.50.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.51.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.52.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.53.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.54.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.55.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.56.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.57.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.58.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.59.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.6.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.60.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.61.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.62.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.63.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.7.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.8.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.10.mlp.experts.9.down_proj.weight": "model-00047-of-00066.safetensors", + "model.layers.11.mlp.experts.0.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.1.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.10.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.11.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.12.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.13.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.14.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.15.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.16.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.17.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.18.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.19.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.2.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.20.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.21.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.22.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.23.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.24.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.25.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.26.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.27.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.28.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.29.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.3.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.30.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.31.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.32.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.33.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.34.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.35.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.36.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.37.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.38.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.39.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.4.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.40.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.41.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.42.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.43.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.44.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.45.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.46.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.47.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.48.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.49.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.5.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.50.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.51.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.52.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.53.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.54.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.55.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.56.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.57.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.58.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.59.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.6.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.60.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.61.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.62.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.63.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.7.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.8.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.11.mlp.experts.9.down_proj.weight": "model-00048-of-00066.safetensors", + "model.layers.12.mlp.experts.0.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.1.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.10.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.11.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.12.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.13.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.14.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.15.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.16.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.17.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.18.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.19.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.2.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.20.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.21.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.22.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.23.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.24.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.25.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.26.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.27.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.28.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.29.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.3.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.30.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.31.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.32.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.33.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.34.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.35.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.36.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.37.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.38.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.39.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.4.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.40.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.41.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.42.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.43.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.44.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.45.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.46.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.47.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.48.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.49.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.5.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.50.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.51.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.52.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.53.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.54.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.55.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.56.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.57.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.58.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.59.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.6.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.60.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.61.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.62.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.63.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.7.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.8.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.12.mlp.experts.9.down_proj.weight": "model-00049-of-00066.safetensors", + "model.layers.13.mlp.experts.0.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.1.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.10.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.11.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.12.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.13.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.14.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.15.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.16.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.17.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.18.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.19.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.2.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.20.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.21.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.22.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.23.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.24.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.25.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.26.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.27.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.28.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.29.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.3.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.30.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.31.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.32.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.33.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.34.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.35.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.36.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.37.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.38.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.39.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.4.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.40.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.41.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.42.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.43.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.44.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.45.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.46.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.47.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.48.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.49.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.5.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.50.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.51.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.52.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.53.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.54.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.55.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.56.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.57.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.58.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.59.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.6.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.60.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.61.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.62.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.63.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.7.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.8.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.13.mlp.experts.9.down_proj.weight": "model-00050-of-00066.safetensors", + "model.layers.14.mlp.experts.0.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.1.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.10.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.11.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.12.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.13.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.14.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.15.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.16.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.17.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.18.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.19.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.2.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.20.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.21.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.22.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.23.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.24.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.25.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.26.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.27.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.28.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.29.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.3.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.30.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.31.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.32.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.33.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.34.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.35.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.36.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.37.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.38.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.39.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.4.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.40.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.41.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.42.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.43.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.44.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.45.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.46.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.47.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.48.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.49.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.5.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.50.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.51.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.52.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.53.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.54.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.55.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.56.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.57.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.58.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.59.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.6.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.60.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.61.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.62.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.63.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.7.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.8.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.14.mlp.experts.9.down_proj.weight": "model-00051-of-00066.safetensors", + "model.layers.15.mlp.experts.0.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.1.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.10.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.11.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.12.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.13.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.14.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.15.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.16.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.17.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.18.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.19.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.2.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.20.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.21.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.22.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.23.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.24.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.25.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.26.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.27.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.28.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.29.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.3.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.30.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.31.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.32.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.33.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.34.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.35.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.36.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.37.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.38.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.39.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.4.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.40.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.41.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.42.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.43.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.44.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.45.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.46.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.47.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.48.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.49.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.5.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.50.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.51.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.52.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.53.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.54.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.55.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.56.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.57.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.58.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.59.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.6.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.60.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.61.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.62.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.63.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.7.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.8.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.15.mlp.experts.9.down_proj.weight": "model-00052-of-00066.safetensors", + "model.layers.16.mlp.experts.0.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.1.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.10.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.11.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.12.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.13.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.14.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.15.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.16.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.17.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.18.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.19.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.2.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.20.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.21.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.22.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.23.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.24.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.25.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.26.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.27.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.28.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.29.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.3.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.30.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.31.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.32.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.33.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.34.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.35.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.36.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.37.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.38.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.39.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.4.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.40.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.41.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.42.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.43.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.44.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.45.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.46.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.47.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.48.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.49.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.5.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.50.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.51.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.52.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.53.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.54.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.55.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.56.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.57.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.58.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.59.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.6.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.60.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.61.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.62.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.63.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.7.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.8.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.16.mlp.experts.9.down_proj.weight": "model-00053-of-00066.safetensors", + "model.layers.17.mlp.experts.0.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.1.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.10.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.11.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.12.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.13.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.14.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.15.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.16.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.17.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.18.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.19.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.2.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.20.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.21.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.22.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.23.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.24.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.25.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.26.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.27.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.28.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.29.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.3.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.30.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.31.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.32.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.33.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.34.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.35.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.36.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.37.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.38.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.39.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.4.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.40.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.41.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.42.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.43.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.44.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.45.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.46.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.47.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.48.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.49.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.5.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.50.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.51.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.52.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.53.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.54.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.55.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.56.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.57.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.58.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.59.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.6.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.60.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.61.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.62.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.63.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.7.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.8.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.17.mlp.experts.9.down_proj.weight": "model-00054-of-00066.safetensors", + "model.layers.18.mlp.experts.0.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.1.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.10.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.11.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.12.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.13.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.14.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.15.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.16.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.17.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.18.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.19.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.2.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.20.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.21.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.22.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.23.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.24.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.25.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.26.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.27.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.28.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.29.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.3.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.30.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.31.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.32.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.33.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.34.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.35.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.36.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.37.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.38.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.39.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.4.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.40.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.41.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.42.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.43.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.44.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.45.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.46.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.47.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.48.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.49.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.5.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.50.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.51.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.52.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.53.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.54.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.55.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.56.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.57.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.58.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.59.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.6.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.60.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.61.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.62.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.63.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.7.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.8.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.18.mlp.experts.9.down_proj.weight": "model-00055-of-00066.safetensors", + "model.layers.19.mlp.experts.0.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.1.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.10.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.11.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.12.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.13.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.14.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.15.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.16.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.17.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.18.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.19.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.2.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.20.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.21.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.22.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.23.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.24.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.25.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.26.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.27.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.28.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.29.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.3.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.30.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.31.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.32.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.33.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.34.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.35.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.36.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.37.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.38.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.39.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.4.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.40.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.41.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.42.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.43.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.44.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.45.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.46.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.47.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.48.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.49.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.5.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.50.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.51.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.52.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.53.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.54.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.55.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.56.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.57.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.58.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.59.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.6.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.60.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.61.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.62.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.63.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.7.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.8.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.19.mlp.experts.9.down_proj.weight": "model-00056-of-00066.safetensors", + "model.layers.20.mlp.experts.0.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.1.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.10.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.11.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.12.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.13.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.14.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.15.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.16.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.17.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.18.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.19.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.2.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.20.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.21.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.22.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.23.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.24.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.25.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.26.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.27.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.28.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.29.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.3.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.30.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.31.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.32.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.33.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.34.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.35.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.36.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.37.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.38.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.39.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.4.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.40.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.41.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.42.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.43.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.44.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.45.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.46.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.47.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.48.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.49.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.5.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.50.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.51.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.52.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.53.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.54.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.55.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.56.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.57.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.58.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.59.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.6.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.60.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.61.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.62.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.63.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.7.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.8.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.20.mlp.experts.9.down_proj.weight": "model-00057-of-00066.safetensors", + "model.layers.21.mlp.experts.0.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.1.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.10.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.11.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.12.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.13.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.14.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.15.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.16.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.17.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.18.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.19.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.2.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.20.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.21.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.22.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.23.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.24.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.25.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.26.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.27.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.28.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.29.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.3.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.30.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.31.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.32.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.33.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.34.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.35.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.36.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.37.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.38.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.39.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.4.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.40.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.41.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.42.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.43.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.44.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.45.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.46.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.47.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.48.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.49.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.5.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.50.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.51.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.52.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.53.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.54.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.55.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.56.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.57.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.58.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.59.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.6.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.60.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.61.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.62.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.63.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.7.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.8.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.21.mlp.experts.9.down_proj.weight": "model-00058-of-00066.safetensors", + "model.layers.22.mlp.experts.0.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.1.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.10.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.11.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.12.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.13.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.14.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.15.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.16.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.17.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.18.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.19.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.2.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.20.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.21.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.22.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.23.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.24.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.25.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.26.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.27.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.28.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.29.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.3.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.30.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.31.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.32.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.33.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.34.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.35.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.36.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.37.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.38.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.39.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.4.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.40.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.41.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.42.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.43.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.44.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.45.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.46.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.47.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.48.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.49.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.5.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.50.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.51.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.52.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.53.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.54.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.55.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.56.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.57.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.58.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.59.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.6.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.60.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.61.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.62.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.63.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.7.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.8.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.22.mlp.experts.9.down_proj.weight": "model-00059-of-00066.safetensors", + "model.layers.23.mlp.experts.0.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.1.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.10.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.11.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.12.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.13.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.14.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.15.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.16.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.17.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.18.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.19.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.2.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.20.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.21.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.22.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.23.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.24.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.25.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.26.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.27.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.28.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.29.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.3.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.30.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.31.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.32.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.33.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.34.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.35.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.36.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.37.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.38.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.39.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.4.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.40.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.41.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.42.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.43.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.44.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.45.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.46.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.47.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.48.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.49.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.5.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.50.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.51.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.52.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.53.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.54.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.55.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.56.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.57.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.58.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.59.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.6.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.60.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.61.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.62.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.63.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.7.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.8.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.23.mlp.experts.9.down_proj.weight": "model-00060-of-00066.safetensors", + "model.layers.24.mlp.experts.0.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.1.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.10.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.11.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.12.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.13.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.14.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.15.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.16.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.17.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.18.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.19.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.2.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.20.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.21.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.22.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.23.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.24.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.25.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.26.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.27.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.28.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.29.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.3.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.30.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.31.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.32.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.33.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.34.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.35.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.36.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.37.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.38.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.39.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.4.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.40.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.41.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.42.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.43.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.44.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.45.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.46.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.47.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.48.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.49.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.5.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.50.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.51.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.52.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.53.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.54.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.55.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.56.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.57.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.58.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.59.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.6.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.60.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.61.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.62.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.63.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.7.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.8.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.24.mlp.experts.9.down_proj.weight": "model-00061-of-00066.safetensors", + "model.layers.25.mlp.experts.0.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.1.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.10.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.11.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.12.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.13.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.14.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.15.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.16.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.17.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.18.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.19.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.2.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.20.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.21.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.22.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.23.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.24.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.25.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.26.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.27.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.28.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.29.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.3.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.30.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.31.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.32.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.33.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.34.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.35.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.36.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.37.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.38.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.39.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.4.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.40.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.41.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.42.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.43.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.44.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.45.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.46.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.47.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.48.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.49.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.5.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.50.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.51.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.52.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.53.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.54.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.55.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.56.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.57.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.58.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.59.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.6.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.60.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.61.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.62.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.63.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.7.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.8.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.25.mlp.experts.9.down_proj.weight": "model-00062-of-00066.safetensors", + "model.layers.26.mlp.experts.0.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.1.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.10.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.11.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.12.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.13.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.14.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.15.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.16.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.17.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.18.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.19.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.2.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.20.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.21.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.22.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.23.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.24.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.25.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.26.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.27.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.28.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.29.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.3.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.30.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.31.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.32.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.33.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.34.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.35.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.36.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.37.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.38.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.39.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.4.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.40.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.41.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.42.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.43.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.44.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.45.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.46.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.47.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.48.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.49.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.5.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.50.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.51.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.52.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.53.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.54.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.55.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.56.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.57.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.58.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.59.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.6.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.60.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.61.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.62.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.63.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.7.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.8.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.26.mlp.experts.9.down_proj.weight": "model-00063-of-00066.safetensors", + "model.layers.27.mlp.experts.0.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.1.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.10.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.11.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.12.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.13.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.14.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.15.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.16.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.17.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.18.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.19.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.2.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.20.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.21.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.22.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.23.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.24.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.25.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.26.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.27.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.28.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.29.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.3.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.30.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.31.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.32.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.33.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.34.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.35.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.36.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.37.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.38.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.39.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.4.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.40.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.41.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.42.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.43.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.44.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.45.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.46.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.47.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.48.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.49.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.5.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.50.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.51.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.52.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.53.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.54.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.55.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.56.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.57.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.58.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.59.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.6.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.60.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.61.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.62.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.63.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.7.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.8.down_proj.weight": "model-00064-of-00066.safetensors", + "model.layers.27.mlp.experts.9.down_proj.weight": "model-00064-of-00066.safetensors", + "model.norm.weight": "model-00065-of-00066.safetensors", + "lm_head.weight": "model-00066-of-00066.safetensors" } -} +} \ No newline at end of file