AvivBick commited on
Commit
ec9cf81
·
verified ·
1 Parent(s): 4535efc

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +24 -51
config.json CHANGED
@@ -1,52 +1,25 @@
1
  {
2
- "name":"LayeredMambaLM",
3
- "input":{
4
- "vocab_size":128256,
5
- "tie_embeddings":true,
6
- "pad_vocab_size_multiple":8,
7
- "lm_head_bias":false
8
- },
9
- "MixerModel":{
10
- "name":"MixerModel",
11
- "input":{
12
- "d_model":2048,
13
- "n_layer":16,
14
- "lm_head_prenorm":"rms"
15
- },
16
- "Blocks":[
17
- {
18
- "name":"LlamaBlock",
19
- "n_layers":16,
20
- "input":{
21
- "resid_dropout":0.0,
22
- "mlp_intermediate_size":8192,
23
- "mlp_act_fn":"silu"
24
- },
25
- "Layer":{
26
- "name":"DiscreteMamba2",
27
- "input":{
28
- "d_state":64,
29
- "n_qk_heads":32,
30
- "n_v_heads":32,
31
- "expand":1,
32
- "chunk_size":128,
33
- "activation":"identity",
34
- "use_ref_impl":false,
35
- "bias":false,
36
- "norm_cls":"none",
37
- "initializer":{
38
- "a_log":"default",
39
- "x":"default",
40
- "B":"default",
41
- "C":"default",
42
- "D":"default",
43
- "z":"identity",
44
- "out":"default",
45
- "convolution":"identity"
46
- }
47
- }
48
- }
49
- }
50
- ]
51
- }
52
- }
 
1
  {
2
+ "model_type": "llamba",
3
+ "vocab_size": 128256,
4
+ "tie_embeddings": true,
5
+ "pad_vocab_size_multiple": 8,
6
+ "lm_head_bias": false,
7
+ "d_model": 2048,
8
+ "n_layer": 32,
9
+ "resid_dropout": 0.0,
10
+ "norm_epsilon": 1e-5,
11
+ "mlp_cfg": {
12
+ "intermediate_size": 14336,
13
+ "bias": false,
14
+ "act_fn": "silu"
15
+ },
16
+ "ssm_cfg": {
17
+ "d_state": 64,
18
+ "n_v_heads": 32,
19
+ "n_qk_heads": 32,
20
+ "expand": 1,
21
+ "chunk_size": 128,
22
+ "activation": "identity",
23
+ "bias": false
24
+ }
25
+ }