AndreaUnibo commited on
Commit
e7694eb
·
verified ·
1 Parent(s): 09617e2

Training in progress, step 1000

Browse files
adapter_config.json CHANGED
@@ -20,45 +20,51 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "model.layers.21.self_attention.kv_proj",
24
- "model.lstm_gates.1.forget_gate",
25
- "model.layers.23.self_attention.kv_proj",
26
- "model.layers.16.self_attention.kv_proj",
27
- "model.layers.13.self_attention.kv_proj",
28
- "model.layers.17.self_attention.kv_proj",
29
- "model.layers.1.mlp.router.layer",
30
- "model.layers.19.self_attention.kv_proj",
31
- "model.lstm_gates.0.forget_gate",
32
- "model.layers.18.mlp.router.layer",
33
- "model.layers.1.mlp.router.perturbation",
34
- "model.layers.20.self_attention.kv_proj",
35
- "model.lstm_gates.0.input_gate",
36
- "model.layers.10.self_attention.kv_proj",
37
- "model.layers.1.self_attention.kv_proj",
38
- "model.layers.22.self_attention.kv_proj",
39
- "model.layers.14.self_attention.kv_proj",
40
- "model.layers.18.self_attention.experts.router.layer",
41
- "model.lstm_gates.1.input_gate",
42
- "model.lstm_gates.0.output_gate",
43
- "model.layers.6.self_attention.kv_proj",
44
  "model.layers.2.self_attention.kv_proj",
 
 
 
45
  "model.lstm_gates.1.transform",
 
 
 
 
 
 
 
 
 
 
 
46
  "model.layers.0.self_attention.kv_proj",
 
 
 
47
  "model.layers.11.self_attention.kv_proj",
48
- "model.lstm_gates.1.output_gate",
49
- "model.layers.3.self_attention.kv_proj",
50
  "model.layers.9.self_attention.kv_proj",
 
 
 
51
  "model.layers.15.self_attention.kv_proj",
52
- "model.layers.19.mlp.router.layer",
53
- "model.layers.4.self_attention.kv_proj",
54
- "model.layers.18.self_attention.kv_proj",
55
- "model.layers.19.self_attention.experts.router.layer",
56
- "model.layers.7.self_attention.kv_proj",
57
  "model.layers.5.self_attention.kv_proj",
58
- "model.layers.1.self_attention.experts.router.layer",
 
 
 
59
  "model.layers.12.self_attention.kv_proj",
60
- "model.lstm_gates.0.transform",
61
- "model.layers.8.self_attention.kv_proj"
 
 
 
62
  ],
63
  "task_type": "CAUSAL_LM",
64
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "model.layers.19.self_attention.experts.router.layer",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "model.layers.2.self_attention.kv_proj",
25
+ "model.layers.19.mlp.router.layer",
26
+ "model.lstm_gates.0.output_gate",
27
+ "model.layers.8.self_attention.kv_proj",
28
  "model.lstm_gates.1.transform",
29
+ "model.lstm_gates.2.output_gate",
30
+ "model.lstm_gates.2.forget_gate",
31
+ "model.lstm_gates.0.forget_gate",
32
+ "model.lstm_gates.1.output_gate",
33
+ "model.lstm_gates.2.transform",
34
+ "model.lstm_gates.0.transform",
35
+ "model.layers.17.self_attention.kv_proj",
36
+ "model.layers.20.self_attention.experts.router.layer",
37
+ "model.layers.16.self_attention.kv_proj",
38
+ "model.layers.1.self_attention.experts.router.layer",
39
+ "model.layers.18.self_attention.kv_proj",
40
  "model.layers.0.self_attention.kv_proj",
41
+ "model.layers.14.self_attention.kv_proj",
42
+ "model.lstm_gates.1.input_gate",
43
+ "model.layers.7.self_attention.kv_proj",
44
  "model.layers.11.self_attention.kv_proj",
45
+ "model.layers.10.self_attention.kv_proj",
46
+ "model.layers.22.self_attention.kv_proj",
47
  "model.layers.9.self_attention.kv_proj",
48
+ "model.lstm_gates.0.input_gate",
49
+ "model.layers.21.self_attention.kv_proj",
50
+ "model.layers.13.self_attention.kv_proj",
51
  "model.layers.15.self_attention.kv_proj",
52
+ "model.layers.23.self_attention.kv_proj",
53
+ "model.layers.19.self_attention.kv_proj",
54
+ "model.lstm_gates.2.input_gate",
55
+ "model.layers.1.mlp.router.perturbation",
56
+ "model.layers.1.mlp.router.layer",
57
  "model.layers.5.self_attention.kv_proj",
58
+ "model.layers.6.self_attention.kv_proj",
59
+ "model.lstm_gates.1.forget_gate",
60
+ "model.layers.4.self_attention.kv_proj",
61
+ "model.layers.1.self_attention.kv_proj",
62
  "model.layers.12.self_attention.kv_proj",
63
+ "model.layers.20.self_attention.kv_proj",
64
+ "model.layers.21.self_attention.experts.router.layer",
65
+ "model.layers.3.self_attention.kv_proj",
66
+ "model.layers.21.mlp.router.layer",
67
+ "model.layers.20.mlp.router.layer"
68
  ],
69
  "task_type": "CAUSAL_LM",
70
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3180dde5b242a076dbaa1a3d3a4b22e8c70d6da569138f24e8f9b0fd942392b
3
- size 6303624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ded2926b6b32fd0682fb44888d3ebdc37da2b930dc27be85394d6db947b5153a
3
+ size 6961088
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1500b2d7c5faffbdf54bd0ff84aa5933c91b72fa6ea95d39011921c13619bc60
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cff8203b4dc3d3e94a95b8019ebd13fa2bda3b26e55f5c21bd9e277b8bee023
3
  size 5176