madhan2211 commited on
Commit
3acfa17
·
verified ·
1 Parent(s): 1054a9f

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +375 -375
config.json CHANGED
@@ -1,375 +1,375 @@
1
- {
2
- "_name_or_path": "microsoft/Florence-2-base-ft",
3
- "architectures": [
4
- "Florence2ForConditionalGeneration"
5
- ],
6
- "auto_map": {
7
- "AutoConfig": "microsoft/Florence-2-base-ft--configuration_florence2.Florence2Config",
8
- "AutoModelForCausalLM": "microsoft/Florence-2-base-ft--modeling_florence2.Florence2ForConditionalGeneration"
9
- },
10
- "bos_token_id": 2,
11
- "eos_token_id": 1,
12
- "id2label": {
13
- "0": "LABEL_0",
14
- "1": "LABEL_1",
15
- "2": "LABEL_2",
16
- "3": "LABEL_3",
17
- "4": "LABEL_4",
18
- "5": "LABEL_5",
19
- "6": "LABEL_6",
20
- "7": "LABEL_7",
21
- "8": "LABEL_8",
22
- "9": "LABEL_9",
23
- "10": "LABEL_10",
24
- "11": "LABEL_11",
25
- "12": "LABEL_12",
26
- "13": "LABEL_13",
27
- "14": "LABEL_14",
28
- "15": "LABEL_15",
29
- "16": "LABEL_16",
30
- "17": "LABEL_17",
31
- "18": "LABEL_18",
32
- "19": "LABEL_19",
33
- "20": "LABEL_20",
34
- "21": "LABEL_21",
35
- "22": "LABEL_22",
36
- "23": "LABEL_23",
37
- "24": "LABEL_24",
38
- "25": "LABEL_25",
39
- "26": "LABEL_26",
40
- "27": "LABEL_27",
41
- "28": "LABEL_28",
42
- "29": "LABEL_29",
43
- "30": "LABEL_30",
44
- "31": "LABEL_31",
45
- "32": "LABEL_32",
46
- "33": "LABEL_33",
47
- "34": "LABEL_34",
48
- "35": "LABEL_35",
49
- "36": "LABEL_36",
50
- "37": "LABEL_37",
51
- "38": "LABEL_38",
52
- "39": "LABEL_39",
53
- "40": "LABEL_40",
54
- "41": "LABEL_41",
55
- "42": "LABEL_42",
56
- "43": "LABEL_43",
57
- "44": "LABEL_44",
58
- "45": "LABEL_45",
59
- "46": "LABEL_46",
60
- "47": "LABEL_47",
61
- "48": "LABEL_48",
62
- "49": "LABEL_49",
63
- "50": "LABEL_50",
64
- "51": "LABEL_51",
65
- "52": "LABEL_52",
66
- "53": "LABEL_53",
67
- "54": "LABEL_54",
68
- "55": "LABEL_55",
69
- "56": "LABEL_56",
70
- "57": "LABEL_57",
71
- "58": "LABEL_58",
72
- "59": "LABEL_59",
73
- "60": "LABEL_60",
74
- "61": "LABEL_61",
75
- "62": "LABEL_62",
76
- "63": "LABEL_63",
77
- "64": "LABEL_64",
78
- "65": "LABEL_65",
79
- "66": "LABEL_66"
80
- },
81
- "ignore_index": -100,
82
- "is_encoder_decoder": true,
83
- "label2id": {
84
- "LABEL_0": 0,
85
- "LABEL_1": 1,
86
- "LABEL_10": 10,
87
- "LABEL_11": 11,
88
- "LABEL_12": 12,
89
- "LABEL_13": 13,
90
- "LABEL_14": 14,
91
- "LABEL_15": 15,
92
- "LABEL_16": 16,
93
- "LABEL_17": 17,
94
- "LABEL_18": 18,
95
- "LABEL_19": 19,
96
- "LABEL_2": 2,
97
- "LABEL_20": 20,
98
- "LABEL_21": 21,
99
- "LABEL_22": 22,
100
- "LABEL_23": 23,
101
- "LABEL_24": 24,
102
- "LABEL_25": 25,
103
- "LABEL_26": 26,
104
- "LABEL_27": 27,
105
- "LABEL_28": 28,
106
- "LABEL_29": 29,
107
- "LABEL_3": 3,
108
- "LABEL_30": 30,
109
- "LABEL_31": 31,
110
- "LABEL_32": 32,
111
- "LABEL_33": 33,
112
- "LABEL_34": 34,
113
- "LABEL_35": 35,
114
- "LABEL_36": 36,
115
- "LABEL_37": 37,
116
- "LABEL_38": 38,
117
- "LABEL_39": 39,
118
- "LABEL_4": 4,
119
- "LABEL_40": 40,
120
- "LABEL_41": 41,
121
- "LABEL_42": 42,
122
- "LABEL_43": 43,
123
- "LABEL_44": 44,
124
- "LABEL_45": 45,
125
- "LABEL_46": 46,
126
- "LABEL_47": 47,
127
- "LABEL_48": 48,
128
- "LABEL_49": 49,
129
- "LABEL_5": 5,
130
- "LABEL_50": 50,
131
- "LABEL_51": 51,
132
- "LABEL_52": 52,
133
- "LABEL_53": 53,
134
- "LABEL_54": 54,
135
- "LABEL_55": 55,
136
- "LABEL_56": 56,
137
- "LABEL_57": 57,
138
- "LABEL_58": 58,
139
- "LABEL_59": 59,
140
- "LABEL_6": 6,
141
- "LABEL_60": 60,
142
- "LABEL_61": 61,
143
- "LABEL_62": 62,
144
- "LABEL_63": 63,
145
- "LABEL_64": 64,
146
- "LABEL_65": 65,
147
- "LABEL_66": 66,
148
- "LABEL_7": 7,
149
- "LABEL_8": 8,
150
- "LABEL_9": 9
151
- },
152
- "model_type": "florence2",
153
- "pad_token_id": 0,
154
- "projection_dim": 768,
155
- "text_config": {
156
- "_name_or_path": "",
157
- "activation_dropout": 0.1,
158
- "activation_function": "gelu",
159
- "add_bias_logits": false,
160
- "add_cross_attention": false,
161
- "add_final_layer_norm": false,
162
- "architectures": null,
163
- "attention_dropout": 0.1,
164
- "bad_words_ids": null,
165
- "begin_suppress_tokens": null,
166
- "bos_token_id": 0,
167
- "chunk_size_feed_forward": 0,
168
- "classif_dropout": 0.1,
169
- "classifier_dropout": 0.0,
170
- "cross_attention_hidden_size": null,
171
- "d_model": 768,
172
- "decoder_attention_heads": 12,
173
- "decoder_ffn_dim": 3072,
174
- "decoder_layerdrop": 0.0,
175
- "decoder_layers": 6,
176
- "decoder_start_token_id": 2,
177
- "diversity_penalty": 0.0,
178
- "do_sample": false,
179
- "dropout": 0.1,
180
- "early_stopping": true,
181
- "encoder_attention_heads": 12,
182
- "encoder_ffn_dim": 3072,
183
- "encoder_layerdrop": 0.0,
184
- "encoder_layers": 6,
185
- "encoder_no_repeat_ngram_size": 0,
186
- "eos_token_id": 2,
187
- "exponential_decay_length_penalty": null,
188
- "finetuning_task": null,
189
- "forced_bos_token_id": 0,
190
- "forced_eos_token_id": 2,
191
- "gradient_checkpointing": false,
192
- "id2label": {
193
- "0": "LABEL_0",
194
- "1": "LABEL_1",
195
- "2": "LABEL_2"
196
- },
197
- "init_std": 0.02,
198
- "is_decoder": false,
199
- "is_encoder_decoder": true,
200
- "label2id": {
201
- "LABEL_0": 0,
202
- "LABEL_1": 1,
203
- "LABEL_2": 2
204
- },
205
- "length_penalty": 1.0,
206
- "max_length": 20,
207
- "max_position_embeddings": 1024,
208
- "min_length": 0,
209
- "model_type": "florence2_language",
210
- "no_repeat_ngram_size": 3,
211
- "normalize_before": false,
212
- "num_beam_groups": 1,
213
- "num_beams": 3,
214
- "num_hidden_layers": 6,
215
- "num_return_sequences": 1,
216
- "output_attentions": false,
217
- "output_hidden_states": false,
218
- "output_scores": false,
219
- "pad_token_id": 1,
220
- "prefix": null,
221
- "problem_type": null,
222
- "pruned_heads": {},
223
- "remove_invalid_values": false,
224
- "repetition_penalty": 1.0,
225
- "return_dict": true,
226
- "return_dict_in_generate": false,
227
- "scale_embedding": false,
228
- "sep_token_id": null,
229
- "suppress_tokens": null,
230
- "task_specific_params": null,
231
- "temperature": 1.0,
232
- "tf_legacy_loss": false,
233
- "tie_encoder_decoder": false,
234
- "tie_word_embeddings": true,
235
- "tokenizer_class": null,
236
- "top_k": 50,
237
- "top_p": 1.0,
238
- "torch_dtype": null,
239
- "torchscript": false,
240
- "typical_p": 1.0,
241
- "use_bfloat16": false,
242
- "use_cache": true,
243
- "vocab_size": 51289
244
- },
245
- "torch_dtype": "float32",
246
- "transformers_version": "4.41.2",
247
- "vision_config": {
248
- "_name_or_path": "",
249
- "add_cross_attention": false,
250
- "architectures": null,
251
- "bad_words_ids": null,
252
- "begin_suppress_tokens": null,
253
- "bos_token_id": null,
254
- "chunk_size_feed_forward": 0,
255
- "cross_attention_hidden_size": null,
256
- "decoder_start_token_id": null,
257
- "depths": [
258
- 1,
259
- 1,
260
- 9,
261
- 1
262
- ],
263
- "dim_embed": [
264
- 128,
265
- 256,
266
- 512,
267
- 1024
268
- ],
269
- "diversity_penalty": 0.0,
270
- "do_sample": false,
271
- "drop_path_rate": 0.1,
272
- "early_stopping": false,
273
- "enable_checkpoint": false,
274
- "encoder_no_repeat_ngram_size": 0,
275
- "eos_token_id": null,
276
- "exponential_decay_length_penalty": null,
277
- "finetuning_task": null,
278
- "forced_bos_token_id": null,
279
- "forced_eos_token_id": null,
280
- "id2label": {
281
- "0": "LABEL_0",
282
- "1": "LABEL_1"
283
- },
284
- "image_feature_source": [
285
- "spatial_avg_pool",
286
- "temporal_avg_pool"
287
- ],
288
- "image_pos_embed": {
289
- "max_pos_embeddings": 50,
290
- "type": "learned_abs_2d"
291
- },
292
- "is_decoder": false,
293
- "is_encoder_decoder": false,
294
- "label2id": {
295
- "LABEL_0": 0,
296
- "LABEL_1": 1
297
- },
298
- "length_penalty": 1.0,
299
- "max_length": 20,
300
- "min_length": 0,
301
- "model_type": "",
302
- "no_repeat_ngram_size": 0,
303
- "num_beam_groups": 1,
304
- "num_beams": 1,
305
- "num_groups": [
306
- 4,
307
- 8,
308
- 16,
309
- 32
310
- ],
311
- "num_heads": [
312
- 4,
313
- 8,
314
- 16,
315
- 32
316
- ],
317
- "num_return_sequences": 1,
318
- "output_attentions": false,
319
- "output_hidden_states": false,
320
- "output_scores": false,
321
- "pad_token_id": null,
322
- "patch_padding": [
323
- 3,
324
- 1,
325
- 1,
326
- 1
327
- ],
328
- "patch_prenorm": [
329
- false,
330
- true,
331
- true,
332
- true
333
- ],
334
- "patch_size": [
335
- 7,
336
- 3,
337
- 3,
338
- 3
339
- ],
340
- "patch_stride": [
341
- 4,
342
- 2,
343
- 2,
344
- 2
345
- ],
346
- "prefix": null,
347
- "problem_type": null,
348
- "projection_dim": 768,
349
- "pruned_heads": {},
350
- "remove_invalid_values": false,
351
- "repetition_penalty": 1.0,
352
- "return_dict": true,
353
- "return_dict_in_generate": false,
354
- "sep_token_id": null,
355
- "suppress_tokens": null,
356
- "task_specific_params": null,
357
- "temperature": 1.0,
358
- "tf_legacy_loss": false,
359
- "tie_encoder_decoder": false,
360
- "tie_word_embeddings": true,
361
- "tokenizer_class": null,
362
- "top_k": 50,
363
- "top_p": 1.0,
364
- "torch_dtype": null,
365
- "torchscript": false,
366
- "typical_p": 1.0,
367
- "use_bfloat16": false,
368
- "visual_temporal_embedding": {
369
- "max_temporal_embeddings": 100,
370
- "type": "COSINE"
371
- },
372
- "window_size": 12
373
- },
374
- "vocab_size": 51289
375
- }
 
1
+ {
2
+ "_name_or_path": "microsoft/Florence-2-base-ft",
3
+ "architectures": [
4
+ "Florence2ForConditionalGeneration"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "microsoft/Florence-2-base-ft--configuration_florence2.Florence2Config",
8
+ "AutoModelForCausalLM": "microsoft/Florence-2-base-ft--modeling_florence2.Florence2ForConditionalGeneration"
9
+ },
10
+ "bos_token_id": 2,
11
+ "eos_token_id": 1,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47",
61
+ "48": "LABEL_48",
62
+ "49": "LABEL_49",
63
+ "50": "LABEL_50",
64
+ "51": "LABEL_51",
65
+ "52": "LABEL_52",
66
+ "53": "LABEL_53",
67
+ "54": "LABEL_54",
68
+ "55": "LABEL_55",
69
+ "56": "LABEL_56",
70
+ "57": "LABEL_57",
71
+ "58": "LABEL_58",
72
+ "59": "LABEL_59",
73
+ "60": "LABEL_60",
74
+ "61": "LABEL_61",
75
+ "62": "LABEL_62",
76
+ "63": "LABEL_63",
77
+ "64": "LABEL_64",
78
+ "65": "LABEL_65",
79
+ "66": "LABEL_66"
80
+ },
81
+ "ignore_index": -100,
82
+ "is_encoder_decoder": true,
83
+ "label2id": {
84
+ "LABEL_0": 0,
85
+ "LABEL_1": 1,
86
+ "LABEL_10": 10,
87
+ "LABEL_11": 11,
88
+ "LABEL_12": 12,
89
+ "LABEL_13": 13,
90
+ "LABEL_14": 14,
91
+ "LABEL_15": 15,
92
+ "LABEL_16": 16,
93
+ "LABEL_17": 17,
94
+ "LABEL_18": 18,
95
+ "LABEL_19": 19,
96
+ "LABEL_2": 2,
97
+ "LABEL_20": 20,
98
+ "LABEL_21": 21,
99
+ "LABEL_22": 22,
100
+ "LABEL_23": 23,
101
+ "LABEL_24": 24,
102
+ "LABEL_25": 25,
103
+ "LABEL_26": 26,
104
+ "LABEL_27": 27,
105
+ "LABEL_28": 28,
106
+ "LABEL_29": 29,
107
+ "LABEL_3": 3,
108
+ "LABEL_30": 30,
109
+ "LABEL_31": 31,
110
+ "LABEL_32": 32,
111
+ "LABEL_33": 33,
112
+ "LABEL_34": 34,
113
+ "LABEL_35": 35,
114
+ "LABEL_36": 36,
115
+ "LABEL_37": 37,
116
+ "LABEL_38": 38,
117
+ "LABEL_39": 39,
118
+ "LABEL_4": 4,
119
+ "LABEL_40": 40,
120
+ "LABEL_41": 41,
121
+ "LABEL_42": 42,
122
+ "LABEL_43": 43,
123
+ "LABEL_44": 44,
124
+ "LABEL_45": 45,
125
+ "LABEL_46": 46,
126
+ "LABEL_47": 47,
127
+ "LABEL_48": 48,
128
+ "LABEL_49": 49,
129
+ "LABEL_5": 5,
130
+ "LABEL_50": 50,
131
+ "LABEL_51": 51,
132
+ "LABEL_52": 52,
133
+ "LABEL_53": 53,
134
+ "LABEL_54": 54,
135
+ "LABEL_55": 55,
136
+ "LABEL_56": 56,
137
+ "LABEL_57": 57,
138
+ "LABEL_58": 58,
139
+ "LABEL_59": 59,
140
+ "LABEL_6": 6,
141
+ "LABEL_60": 60,
142
+ "LABEL_61": 61,
143
+ "LABEL_62": 62,
144
+ "LABEL_63": 63,
145
+ "LABEL_64": 64,
146
+ "LABEL_65": 65,
147
+ "LABEL_66": 66,
148
+ "LABEL_7": 7,
149
+ "LABEL_8": 8,
150
+ "LABEL_9": 9
151
+ },
152
+ "model_type": "florence2",
153
+ "pad_token_id": 0,
154
+ "projection_dim": 768,
155
+ "text_config": {
156
+ "_name_or_path": "",
157
+ "activation_dropout": 0.1,
158
+ "activation_function": "gelu",
159
+ "add_bias_logits": false,
160
+ "add_cross_attention": false,
161
+ "add_final_layer_norm": false,
162
+ "architectures": null,
163
+ "attention_dropout": 0.1,
164
+ "bad_words_ids": null,
165
+ "begin_suppress_tokens": null,
166
+ "bos_token_id": 0,
167
+ "chunk_size_feed_forward": 0,
168
+ "classif_dropout": 0.1,
169
+ "classifier_dropout": 0.0,
170
+ "cross_attention_hidden_size": null,
171
+ "d_model": 768,
172
+ "decoder_attention_heads": 12,
173
+ "decoder_ffn_dim": 3072,
174
+ "decoder_layerdrop": 0.0,
175
+ "decoder_layers": 6,
176
+ "decoder_start_token_id": 2,
177
+ "diversity_penalty": 0.0,
178
+ "do_sample": false,
179
+ "dropout": 0.1,
180
+ "early_stopping": true,
181
+ "encoder_attention_heads": 12,
182
+ "encoder_ffn_dim": 3072,
183
+ "encoder_layerdrop": 0.0,
184
+ "encoder_layers": 6,
185
+ "encoder_no_repeat_ngram_size": 0,
186
+ "eos_token_id": 2,
187
+ "exponential_decay_length_penalty": null,
188
+ "finetuning_task": null,
189
+ "forced_bos_token_id": 0,
190
+ "forced_eos_token_id": 2,
191
+ "gradient_checkpointing": false,
192
+ "id2label": {
193
+ "0": "LABEL_0",
194
+ "1": "LABEL_1",
195
+ "2": "LABEL_2"
196
+ },
197
+ "init_std": 0.02,
198
+ "is_decoder": false,
199
+ "is_encoder_decoder": true,
200
+ "label2id": {
201
+ "LABEL_0": 0,
202
+ "LABEL_1": 1,
203
+ "LABEL_2": 2
204
+ },
205
+ "length_penalty": 1.0,
206
+ "max_length": 20,
207
+ "max_position_embeddings": 1024,
208
+ "min_length": 0,
209
+ "model_type": "florence2_language",
210
+ "no_repeat_ngram_size": 3,
211
+ "normalize_before": false,
212
+ "num_beam_groups": 1,
213
+ "num_beams": 3,
214
+ "num_hidden_layers": 6,
215
+ "num_return_sequences": 1,
216
+ "output_attentions": false,
217
+ "output_hidden_states": false,
218
+ "output_scores": false,
219
+ "pad_token_id": 1,
220
+ "prefix": null,
221
+ "problem_type": null,
222
+ "pruned_heads": {},
223
+ "remove_invalid_values": false,
224
+ "repetition_penalty": 1.0,
225
+ "return_dict": true,
226
+ "return_dict_in_generate": false,
227
+ "scale_embedding": false,
228
+ "sep_token_id": null,
229
+ "suppress_tokens": null,
230
+ "task_specific_params": null,
231
+ "temperature": 1.0,
232
+ "tf_legacy_loss": false,
233
+ "tie_encoder_decoder": false,
234
+ "tie_word_embeddings": true,
235
+ "tokenizer_class": null,
236
+ "top_k": 50,
237
+ "top_p": 1.0,
238
+ "torch_dtype": null,
239
+ "torchscript": false,
240
+ "typical_p": 1.0,
241
+ "use_bfloat16": false,
242
+ "use_cache": true,
243
+ "vocab_size": 51289
244
+ },
245
+ "torch_dtype": "float32",
246
+ "transformers_version": "4.41.2",
247
+ "vision_config": {
248
+ "_name_or_path": "",
249
+ "add_cross_attention": false,
250
+ "architectures": null,
251
+ "bad_words_ids": null,
252
+ "begin_suppress_tokens": null,
253
+ "bos_token_id": null,
254
+ "chunk_size_feed_forward": 0,
255
+ "cross_attention_hidden_size": null,
256
+ "decoder_start_token_id": null,
257
+ "depths": [
258
+ 1,
259
+ 1,
260
+ 9,
261
+ 1
262
+ ],
263
+ "dim_embed": [
264
+ 128,
265
+ 256,
266
+ 512,
267
+ 1024
268
+ ],
269
+ "diversity_penalty": 0.0,
270
+ "do_sample": false,
271
+ "drop_path_rate": 0.1,
272
+ "early_stopping": false,
273
+ "enable_checkpoint": false,
274
+ "encoder_no_repeat_ngram_size": 0,
275
+ "eos_token_id": null,
276
+ "exponential_decay_length_penalty": null,
277
+ "finetuning_task": null,
278
+ "forced_bos_token_id": null,
279
+ "forced_eos_token_id": null,
280
+ "id2label": {
281
+ "0": "LABEL_0",
282
+ "1": "LABEL_1"
283
+ },
284
+ "image_feature_source": [
285
+ "spatial_avg_pool",
286
+ "temporal_avg_pool"
287
+ ],
288
+ "image_pos_embed": {
289
+ "max_pos_embeddings": 50,
290
+ "type": "learned_abs_2d"
291
+ },
292
+ "is_decoder": false,
293
+ "is_encoder_decoder": false,
294
+ "label2id": {
295
+ "LABEL_0": 0,
296
+ "LABEL_1": 1
297
+ },
298
+ "length_penalty": 1.0,
299
+ "max_length": 20,
300
+ "min_length": 0,
301
+ "model_type": "davit",
302
+ "no_repeat_ngram_size": 0,
303
+ "num_beam_groups": 1,
304
+ "num_beams": 1,
305
+ "num_groups": [
306
+ 4,
307
+ 8,
308
+ 16,
309
+ 32
310
+ ],
311
+ "num_heads": [
312
+ 4,
313
+ 8,
314
+ 16,
315
+ 32
316
+ ],
317
+ "num_return_sequences": 1,
318
+ "output_attentions": false,
319
+ "output_hidden_states": false,
320
+ "output_scores": false,
321
+ "pad_token_id": null,
322
+ "patch_padding": [
323
+ 3,
324
+ 1,
325
+ 1,
326
+ 1
327
+ ],
328
+ "patch_prenorm": [
329
+ false,
330
+ true,
331
+ true,
332
+ true
333
+ ],
334
+ "patch_size": [
335
+ 7,
336
+ 3,
337
+ 3,
338
+ 3
339
+ ],
340
+ "patch_stride": [
341
+ 4,
342
+ 2,
343
+ 2,
344
+ 2
345
+ ],
346
+ "prefix": null,
347
+ "problem_type": null,
348
+ "projection_dim": 768,
349
+ "pruned_heads": {},
350
+ "remove_invalid_values": false,
351
+ "repetition_penalty": 1.0,
352
+ "return_dict": true,
353
+ "return_dict_in_generate": false,
354
+ "sep_token_id": null,
355
+ "suppress_tokens": null,
356
+ "task_specific_params": null,
357
+ "temperature": 1.0,
358
+ "tf_legacy_loss": false,
359
+ "tie_encoder_decoder": false,
360
+ "tie_word_embeddings": true,
361
+ "tokenizer_class": null,
362
+ "top_k": 50,
363
+ "top_p": 1.0,
364
+ "torch_dtype": null,
365
+ "torchscript": false,
366
+ "typical_p": 1.0,
367
+ "use_bfloat16": false,
368
+ "visual_temporal_embedding": {
369
+ "max_temporal_embeddings": 100,
370
+ "type": "COSINE"
371
+ },
372
+ "window_size": 12
373
+ },
374
+ "vocab_size": 51289
375
+ }