zhengwenzhen commited on
Commit
1c75282
·
verified ·
1 Parent(s): 2dd8a19

Upload configuration_step1.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_step1.py +41 -0
configuration_step1.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, List, Any, Dict
2
+ from transformers.configuration_utils import PretrainedConfig
3
+
4
+
5
+
6
+ class Step1Config(PretrainedConfig):
7
+ model_type = "step1"
8
+ keys_to_ignore_at_inference = ["past_key_values"]
9
+
10
+ def __init__(
11
+ self,
12
+ hidden_size: int = 5120,
13
+ intermediate_size: int = 13312,
14
+ num_attention_heads: int = 40,
15
+ num_attention_groups: int = 8,
16
+ num_hidden_layers: int = 48,
17
+ max_seq_len: int = 4096,
18
+ vocab_size: int = 65536,
19
+ rms_norm_eps: float = 1e-5,
20
+ bos_token_id: int = 1,
21
+ eos_token_id: int = 3,
22
+ pad_token_id: int = 0,
23
+ **kwargs,
24
+ ) -> None:
25
+ self.hidden_size = hidden_size
26
+ self.intermediate_size = intermediate_size
27
+ self.num_attention_heads = num_attention_heads
28
+ self.num_attention_groups = num_attention_groups
29
+ self.num_hidden_layers = num_hidden_layers
30
+ self.max_seq_len = max_seq_len
31
+ self.vocab_size = vocab_size
32
+ self.rms_norm_eps = rms_norm_eps
33
+ super().__init__(
34
+ bos_token_id=bos_token_id,
35
+ pad_token_id=pad_token_id,
36
+ eos_token_id=eos_token_id,
37
+ **kwargs
38
+ )
39
+
40
+
41
+ __all__ = ["Step1Config"]