mHossain commited on
Commit
102154c
·
1 Parent(s): efdccbf

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -13
  2. config.json +188 -1
  3. generation_config.json +4 -1
  4. pytorch_model.bin +1 -1
  5. training_args.bin +1 -1
README.md CHANGED
@@ -3,8 +3,6 @@ license: apache-2.0
3
  base_model: google/mt5-base
4
  tags:
5
  - generated_from_trainer
6
- metrics:
7
- - rouge
8
  model-index:
9
  - name: en_bn_summarize_v10
10
  results: []
@@ -16,13 +14,6 @@ should probably proofread and complete it, then remove this comment. -->
16
  # en_bn_summarize_v10
17
 
18
  This model is a fine-tuned version of [google/mt5-base](https://huggingface.co/google/mt5-base) on an unknown dataset.
19
- It achieves the following results on the evaluation set:
20
- - Loss: 17.9980
21
- - Rouge1: 0.0
22
- - Rouge2: 0.0
23
- - Rougel: 0.0
24
- - Rougelsum: 0.0
25
- - Gen Len: 11.1118
26
 
27
  ## Model description
28
 
@@ -50,15 +41,13 @@ The following hyperparameters were used during training:
50
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
  - lr_scheduler_type: linear
52
  - lr_scheduler_warmup_steps: 5000
53
- - num_epochs: 3
54
 
55
  ### Training results
56
 
57
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
58
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
59
- | No log | 1.0 | 154 | 20.6865 | 0.0 | 0.0 | 0.0 | 0.0 | 7.2795 |
60
- | 22.549 | 2.0 | 308 | 20.0045 | 0.0 | 0.0 | 0.0 | 0.0 | 9.4348 |
61
- | 22.549 | 3.0 | 462 | 17.9980 | 0.0 | 0.0 | 0.0 | 0.0 | 11.1118 |
62
 
63
 
64
  ### Framework versions
 
3
  base_model: google/mt5-base
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: en_bn_summarize_v10
8
  results: []
 
14
  # en_bn_summarize_v10
15
 
16
  This model is a fine-tuned version of [google/mt5-base](https://huggingface.co/google/mt5-base) on an unknown dataset.
 
 
 
 
 
 
 
17
 
18
  ## Model description
19
 
 
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
  - lr_scheduler_warmup_steps: 5000
44
+ - num_epochs: 1
45
 
46
  ### Training results
47
 
48
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
49
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
50
+ | No log | 1.0 | 154 | 21.8463 | 0.0 | 0.0 | 0.0 | 0.0 | 51.5901 |
 
 
51
 
52
 
53
  ### Framework versions
config.json CHANGED
@@ -7,7 +7,7 @@
7
  "d_ff": 2048,
8
  "d_kv": 64,
9
  "d_model": 768,
10
- "decoder_start_token_id": 2,
11
  "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
@@ -16,7 +16,10 @@
16
  "is_encoder_decoder": true,
17
  "is_gated_act": true,
18
  "layer_norm_epsilon": 1e-06,
 
 
19
  "model_type": "mt5",
 
20
  "num_decoder_layers": 12,
21
  "num_heads": 12,
22
  "num_layers": 12,
@@ -24,6 +27,190 @@
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "tie_word_embeddings": false,
28
  "tokenizer_class": "T5Tokenizer",
29
  "torch_dtype": "float32",
 
7
  "d_ff": 2048,
8
  "d_kv": 64,
9
  "d_model": 768,
10
+ "decoder_start_token_id": 250042,
11
  "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
 
16
  "is_encoder_decoder": true,
17
  "is_gated_act": true,
18
  "layer_norm_epsilon": 1e-06,
19
+ "length_penalty": 0.6,
20
+ "max_length": 84,
21
  "model_type": "mt5",
22
+ "num_beams": 4,
23
  "num_decoder_layers": 12,
24
  "num_heads": 12,
25
  "num_layers": 12,
 
27
  "pad_token_id": 0,
28
  "relative_attention_max_distance": 128,
29
  "relative_attention_num_buckets": 32,
30
+ "task_specific_params": {
31
+ "langid_map": {
32
+ "amharic": [
33
+ 35,
34
+ "\u2581<extra_id_64>"
35
+ ],
36
+ "arabic": [
37
+ 4,
38
+ "\u2581<extra_id_95>"
39
+ ],
40
+ "azerbaijani": [
41
+ 7,
42
+ "\u2581<extra_id_92>"
43
+ ],
44
+ "bengali": [
45
+ 42,
46
+ "\u2581<extra_id_57>"
47
+ ],
48
+ "burmese": [
49
+ 33,
50
+ "\u2581<extra_id_66>"
51
+ ],
52
+ "chinese_simplified": [
53
+ 40,
54
+ "\u2581<extra_id_59>"
55
+ ],
56
+ "chinese_traditional": [
57
+ 44,
58
+ "\u2581<extra_id_55>"
59
+ ],
60
+ "english": [
61
+ 30,
62
+ "\u2581<extra_id_69>"
63
+ ],
64
+ "french": [
65
+ 10,
66
+ "\u2581<extra_id_89>"
67
+ ],
68
+ "gujarati": [
69
+ 27,
70
+ "\u2581<extra_id_72>"
71
+ ],
72
+ "hausa": [
73
+ 43,
74
+ "\u2581<extra_id_56>"
75
+ ],
76
+ "hindi": [
77
+ 21,
78
+ "\u2581<extra_id_78>"
79
+ ],
80
+ "igbo": [
81
+ 9,
82
+ "\u2581<extra_id_90>"
83
+ ],
84
+ "indonesian": [
85
+ 1,
86
+ "\u2581<extra_id_98>"
87
+ ],
88
+ "japanese": [
89
+ 37,
90
+ "\u2581<extra_id_62>"
91
+ ],
92
+ "kirundi": [
93
+ 0,
94
+ "\u2581<extra_id_99>"
95
+ ],
96
+ "korean": [
97
+ 29,
98
+ "\u2581<extra_id_70>"
99
+ ],
100
+ "kyrgyz": [
101
+ 5,
102
+ "\u2581<extra_id_94>"
103
+ ],
104
+ "marathi": [
105
+ 13,
106
+ "\u2581<extra_id_86>"
107
+ ],
108
+ "nepali": [
109
+ 20,
110
+ "\u2581<extra_id_79>"
111
+ ],
112
+ "oromo": [
113
+ 41,
114
+ "\u2581<extra_id_58>"
115
+ ],
116
+ "pashto": [
117
+ 34,
118
+ "\u2581<extra_id_65>"
119
+ ],
120
+ "persian": [
121
+ 23,
122
+ "\u2581<extra_id_76>"
123
+ ],
124
+ "pidgin": [
125
+ 14,
126
+ "\u2581<extra_id_85>"
127
+ ],
128
+ "portuguese": [
129
+ 39,
130
+ "\u2581<extra_id_60>"
131
+ ],
132
+ "punjabi": [
133
+ 17,
134
+ "\u2581<extra_id_82>"
135
+ ],
136
+ "russian": [
137
+ 36,
138
+ "\u2581<extra_id_63>"
139
+ ],
140
+ "scottish_gaelic": [
141
+ 24,
142
+ "\u2581<extra_id_75>"
143
+ ],
144
+ "serbian_cyrillic": [
145
+ 28,
146
+ "\u2581<extra_id_71>"
147
+ ],
148
+ "serbian_latin": [
149
+ 11,
150
+ "\u2581<extra_id_88>"
151
+ ],
152
+ "sinhala": [
153
+ 31,
154
+ "\u2581<extra_id_68>"
155
+ ],
156
+ "somali": [
157
+ 19,
158
+ "\u2581<extra_id_80>"
159
+ ],
160
+ "spanish": [
161
+ 3,
162
+ "\u2581<extra_id_96>"
163
+ ],
164
+ "swahili": [
165
+ 18,
166
+ "\u2581<extra_id_81>"
167
+ ],
168
+ "tamil": [
169
+ 32,
170
+ "\u2581<extra_id_67>"
171
+ ],
172
+ "telugu": [
173
+ 22,
174
+ "\u2581<extra_id_77>"
175
+ ],
176
+ "thai": [
177
+ 6,
178
+ "\u2581<extra_id_93>"
179
+ ],
180
+ "tigrinya": [
181
+ 16,
182
+ "\u2581<extra_id_83>"
183
+ ],
184
+ "turkish": [
185
+ 15,
186
+ "\u2581<extra_id_84>"
187
+ ],
188
+ "ukrainian": [
189
+ 2,
190
+ "\u2581<extra_id_97>"
191
+ ],
192
+ "urdu": [
193
+ 38,
194
+ "\u2581<extra_id_61>"
195
+ ],
196
+ "uzbek": [
197
+ 8,
198
+ "\u2581<extra_id_91>"
199
+ ],
200
+ "vietnamese": [
201
+ 12,
202
+ "\u2581<extra_id_87>"
203
+ ],
204
+ "welsh": [
205
+ 26,
206
+ "\u2581<extra_id_73>"
207
+ ],
208
+ "yoruba": [
209
+ 25,
210
+ "\u2581<extra_id_74>"
211
+ ]
212
+ }
213
+ },
214
  "tie_word_embeddings": false,
215
  "tokenizer_class": "T5Tokenizer",
216
  "torch_dtype": "float32",
generation_config.json CHANGED
@@ -1,6 +1,9 @@
1
  {
2
- "decoder_start_token_id": 2,
3
  "eos_token_id": 1,
 
 
 
4
  "pad_token_id": 0,
5
  "transformers_version": "4.34.0"
6
  }
 
1
  {
2
+ "decoder_start_token_id": 250042,
3
  "eos_token_id": 1,
4
+ "length_penalty": 0.6,
5
+ "max_length": 84,
6
+ "num_beams": 4,
7
  "pad_token_id": 0,
8
  "transformers_version": "4.34.0"
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c713fdab7f99535b13dec5957df269574af550cbc5685a932689907e195c06b
3
  size 2329702581
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463a50ad15c9c0c2c8a7f6950790c52ec47e2c1c6b33920a4fcdaf5b8d4dab2f
3
  size 2329702581
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d41a327d7dbb68bdd916d3a0282dc0f54083139750b15ba1521a53ef017df7b
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fee17a8e368fd46ee983224f7e1aed285706fd6ffacdd66c7a482f8f23b957a8
3
  size 4219