AvivBick commited on
Commit
4535efc
·
verified ·
1 Parent(s): 23e6352

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +52 -1
config.json CHANGED
@@ -1 +1,52 @@
1
- {"name": "LayeredMambaLM", "input": {"vocab_size": 128256, "tie_embeddings": true, "pad_vocab_size_multiple": 8, "lm_head_bias": false}, "MixerModel": {"name": "MixerModel", "input": {"d_model": 2048, "n_layer": 16, "lm_head_prenorm": "rms"}, "Blocks": [{"name": "LlamaBlock", "n_layers": 16, "input": {"resid_dropout": 0.0, "mlp_intermediate_size": 8192, "mlp_act_fn": "silu"}, "Layer": {"name": "DiscreteMamba2", "input": {"d_state": 64, "n_qk_heads": 32, "n_v_heads": 32, "expand": 1, "chunk_size": 128, "activation": "identity", "use_ref_impl": false, "bias": false, "norm_cls": "none", "initializer": {"a_log": "default", "x": "default", "B": "default", "C": "default", "D": "default", "z": "identity", "out": "default", "convolution": "identity"}}}}]}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name":"LayeredMambaLM",
3
+ "input":{
4
+ "vocab_size":128256,
5
+ "tie_embeddings":true,
6
+ "pad_vocab_size_multiple":8,
7
+ "lm_head_bias":false
8
+ },
9
+ "MixerModel":{
10
+ "name":"MixerModel",
11
+ "input":{
12
+ "d_model":2048,
13
+ "n_layer":16,
14
+ "lm_head_prenorm":"rms"
15
+ },
16
+ "Blocks":[
17
+ {
18
+ "name":"LlamaBlock",
19
+ "n_layers":16,
20
+ "input":{
21
+ "resid_dropout":0.0,
22
+ "mlp_intermediate_size":8192,
23
+ "mlp_act_fn":"silu"
24
+ },
25
+ "Layer":{
26
+ "name":"DiscreteMamba2",
27
+ "input":{
28
+ "d_state":64,
29
+ "n_qk_heads":32,
30
+ "n_v_heads":32,
31
+ "expand":1,
32
+ "chunk_size":128,
33
+ "activation":"identity",
34
+ "use_ref_impl":false,
35
+ "bias":false,
36
+ "norm_cls":"none",
37
+ "initializer":{
38
+ "a_log":"default",
39
+ "x":"default",
40
+ "B":"default",
41
+ "C":"default",
42
+ "D":"default",
43
+ "z":"identity",
44
+ "out":"default",
45
+ "convolution":"identity"
46
+ }
47
+ }
48
+ }
49
+ }
50
+ ]
51
+ }
52
+ }