Text Generation
Transformers
Safetensors
PyTorch
English
nemotron-nas
llama-3
llama
nvidia-nemotron
nemotron-ultra
fine-tuned
conversational-ai
large-language-model
huggingface
open-source-llm
generative-ai
nvidia
meta-llama
instruct-tuning
chat-model
llm
artificial-intelligence
deep-learning
tensorrt-llm
gpu-optimized
multilingual
instruction-following
conversational
custom_code
Uploading optimized model files
Browse files- configuration_decilm.py +65 -0
configuration_decilm.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2024 Nvidia Corporation. All rights reserved.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
import dataclasses
|
17 |
+
import warnings
|
18 |
+
from typing import Dict, Any
|
19 |
+
|
20 |
+
from transformers.utils import is_flash_attn_2_available
|
21 |
+
|
22 |
+
from .block_config import BlockConfig
|
23 |
+
from .transformers_4_44_2__configuration_llama import LlamaConfig
|
24 |
+
from .transformers_4_44_2__modeling_rope_utils import \
|
25 |
+
rope_config_validation # fake import to make AutoConfig infer the dependency
|
26 |
+
|
27 |
+
rope_config_validation # this line is here to make sure that auto-formatting doesn't remove the import
|
28 |
+
|
29 |
+
|
30 |
+
class DeciLMConfig(LlamaConfig):
|
31 |
+
model_type = "nemotron-nas"
|
32 |
+
|
33 |
+
def __init__(
|
34 |
+
self,
|
35 |
+
block_configs: list[dict] | list[BlockConfig] = None,
|
36 |
+
**kwargs,
|
37 |
+
):
|
38 |
+
attn_implementation = kwargs.pop("attn_implementation", None)
|
39 |
+
if attn_implementation is None and is_flash_attn_2_available():
|
40 |
+
attn_implementation = "flash_attention_2"
|
41 |
+
|
42 |
+
if block_configs is not None:
|
43 |
+
if isinstance(block_configs[0], dict):
|
44 |
+
block_configs = [BlockConfig(**conf) for conf in block_configs]
|
45 |
+
|
46 |
+
using_unshifted_sink = any([block_config.attention.unshifted_sink for block_config in block_configs])
|
47 |
+
if using_unshifted_sink and attn_implementation != "eager":
|
48 |
+
warnings.warn("Forcing attn_implementation='eager' since some attention layers use unshifted sink")
|
49 |
+
attn_implementation = "eager"
|
50 |
+
|
51 |
+
super().__init__(attn_implementation=attn_implementation, **kwargs)
|
52 |
+
|
53 |
+
self.intermediate_size = None
|
54 |
+
self.num_key_value_heads = None
|
55 |
+
|
56 |
+
if block_configs is not None:
|
57 |
+
assert len(block_configs) == self.num_hidden_layers
|
58 |
+
|
59 |
+
self.block_configs: list[BlockConfig] = block_configs
|
60 |
+
|
61 |
+
def to_dict(self) -> Dict[str, Any]:
|
62 |
+
self_dict = super().to_dict()
|
63 |
+
if self.block_configs is not None:
|
64 |
+
self_dict["block_configs"] = [dataclasses.asdict(conf) for conf in self.block_configs]
|
65 |
+
return self_dict
|