lambda-technologies-limited commited on
Commit
28133c4
·
verified ·
1 Parent(s): 9ea8004

Uploading optimized model files

Browse files
Files changed (1) hide show
  1. configuration_decilm.py +65 -0
configuration_decilm.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2024 Nvidia Corporation. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import dataclasses
17
+ import warnings
18
+ from typing import Dict, Any
19
+
20
+ from transformers.utils import is_flash_attn_2_available
21
+
22
+ from .block_config import BlockConfig
23
+ from .transformers_4_44_2__configuration_llama import LlamaConfig
24
+ from .transformers_4_44_2__modeling_rope_utils import \
25
+ rope_config_validation # fake import to make AutoConfig infer the dependency
26
+
27
+ rope_config_validation # this line is here to make sure that auto-formatting doesn't remove the import
28
+
29
+
30
+ class DeciLMConfig(LlamaConfig):
31
+ model_type = "nemotron-nas"
32
+
33
+ def __init__(
34
+ self,
35
+ block_configs: list[dict] | list[BlockConfig] = None,
36
+ **kwargs,
37
+ ):
38
+ attn_implementation = kwargs.pop("attn_implementation", None)
39
+ if attn_implementation is None and is_flash_attn_2_available():
40
+ attn_implementation = "flash_attention_2"
41
+
42
+ if block_configs is not None:
43
+ if isinstance(block_configs[0], dict):
44
+ block_configs = [BlockConfig(**conf) for conf in block_configs]
45
+
46
+ using_unshifted_sink = any([block_config.attention.unshifted_sink for block_config in block_configs])
47
+ if using_unshifted_sink and attn_implementation != "eager":
48
+ warnings.warn("Forcing attn_implementation='eager' since some attention layers use unshifted sink")
49
+ attn_implementation = "eager"
50
+
51
+ super().__init__(attn_implementation=attn_implementation, **kwargs)
52
+
53
+ self.intermediate_size = None
54
+ self.num_key_value_heads = None
55
+
56
+ if block_configs is not None:
57
+ assert len(block_configs) == self.num_hidden_layers
58
+
59
+ self.block_configs: list[BlockConfig] = block_configs
60
+
61
+ def to_dict(self) -> Dict[str, Any]:
62
+ self_dict = super().to_dict()
63
+ if self.block_configs is not None:
64
+ self_dict["block_configs"] = [dataclasses.asdict(conf) for conf in self.block_configs]
65
+ return self_dict