pglo commited on
Commit
d0813c9
·
verified ·
1 Parent(s): 990d0cd

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.json +60 -57
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,82 +1,85 @@
1
  {
2
- "_name_or_path": "Zyphra/Zamba2-1.2B-instruct",
3
  "add_bias_linear": false,
4
- "architectures": [
5
- "Zamba2ForCausalLM"
6
- ],
7
  "attention_dropout": 0.0,
 
 
8
  "bos_token_id": 1,
9
- "conv_dimension": 4,
10
  "eos_token_id": 2,
11
- "expansion_factor": 2,
12
  "ffn_hidden_size": 8192,
13
- "ft_lora": false,
14
- "gated_linear_unit": true,
15
  "hidden_size": 2048,
16
  "initializer_range": 0.02,
 
17
  "kv_channels": 64,
18
  "layers_block_type": [
19
- "m",
20
- "m",
21
- "m",
22
- "m",
23
- "m",
24
- "g",
25
- "m",
26
- "m",
27
- "m",
28
- "m",
29
- "m",
30
- "g",
31
- "m",
32
- "m",
33
- "m",
34
- "m",
35
- "m",
36
- "g",
37
- "m",
38
- "m",
39
- "m",
40
- "m",
41
- "m",
42
- "g",
43
- "m",
44
- "m",
45
- "m",
46
- "m",
47
- "m",
48
- "g",
49
- "m",
50
- "m",
51
- "m",
52
- "m",
53
- "m",
54
- "g",
55
- "m",
56
- "m"
57
  ],
58
- "lora_rank": 128,
59
- "lora_rank_mamba": 128,
 
60
  "mamba_headdim": 64,
 
61
  "max_position_embeddings": 4096,
62
  "model_type": "zamba2",
 
63
  "num_attention_heads": 32,
64
  "num_hidden_layers": 38,
65
  "num_key_value_heads": 32,
66
  "num_logits_to_keep": 1,
67
  "num_mem_blocks": 1,
68
  "num_query_groups": 32,
69
- "pad_token_id": 2,
70
  "rms_norm_eps": 1e-05,
71
  "rope_theta": 10000,
72
- "sliding_window": null,
73
- "state_size": 128,
74
- "torch_dtype": "float32",
75
- "transformers_version": "4.43.0.dev0",
76
- "use_cache": false,
77
- "use_mamba_kernels": true,
 
 
78
  "use_mem_rope": true,
79
- "use_shared_attention_lora": true,
80
- "use_shared_block_lora": true,
81
  "vocab_size": 32000
82
  }
 
1
  {
2
+ "adapter_rank": 128,
3
  "add_bias_linear": false,
 
 
 
4
  "attention_dropout": 0.0,
5
+ "attention_head_dim": 128,
6
+ "attention_hidden_size": 4096,
7
  "bos_token_id": 1,
8
+ "chunk_size": 256,
9
  "eos_token_id": 2,
 
10
  "ffn_hidden_size": 8192,
11
+ "hidden_act": "gelu",
 
12
  "hidden_size": 2048,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
  "kv_channels": 64,
16
  "layers_block_type": [
17
+ "mamba",
18
+ "mamba",
19
+ "mamba",
20
+ "mamba",
21
+ "mamba",
22
+ "hybrid",
23
+ "mamba",
24
+ "mamba",
25
+ "mamba",
26
+ "mamba",
27
+ "mamba",
28
+ "hybrid",
29
+ "mamba",
30
+ "mamba",
31
+ "mamba",
32
+ "mamba",
33
+ "mamba",
34
+ "hybrid",
35
+ "mamba",
36
+ "mamba",
37
+ "mamba",
38
+ "mamba",
39
+ "mamba",
40
+ "hybrid",
41
+ "mamba",
42
+ "mamba",
43
+ "mamba",
44
+ "mamba",
45
+ "mamba",
46
+ "hybrid",
47
+ "mamba",
48
+ "mamba",
49
+ "mamba",
50
+ "mamba",
51
+ "mamba",
52
+ "hybrid",
53
+ "mamba",
54
+ "mamba"
55
  ],
56
+ "mamba_d_conv": 4,
57
+ "mamba_d_state": 128,
58
+ "mamba_expand": 2,
59
  "mamba_headdim": 64,
60
+ "mamba_ngroups": 1,
61
  "max_position_embeddings": 4096,
62
  "model_type": "zamba2",
63
+ "n_mamba_heads": 64,
64
  "num_attention_heads": 32,
65
  "num_hidden_layers": 38,
66
  "num_key_value_heads": 32,
67
  "num_logits_to_keep": 1,
68
  "num_mem_blocks": 1,
69
  "num_query_groups": 32,
70
+ "pad_token_id": 0,
71
  "rms_norm_eps": 1e-05,
72
  "rope_theta": 10000,
73
+ "time_step_floor": 0.0001,
74
+ "time_step_limit": null,
75
+ "time_step_max": 0.1,
76
+ "time_step_min": 0.001,
77
+ "transformers_version": "4.49.0.dev0",
78
+ "use_cache": true,
79
+ "use_conv_bias": true,
80
+ "use_long_context": false,
81
  "use_mem_rope": true,
82
+ "use_shared_attention_adapter": true,
83
+ "use_shared_mlp_adapter": true,
84
  "vocab_size": 32000
85
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbf2d9f677192fd5869196e5e6f20c89c924d4921ac5277d7e18eeb10d6e6c30
3
- size 2561247992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b442e3dd5477526c396465a1342c23ce7468d4c60e973224691e79af81d65328
3
+ size 4860300264