shubhrapandit commited on
Commit
fcd4bf9
·
1 Parent(s): d6431c3

Update files to match base model keeping quant_config intact

Browse files
added_tokens.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "</tool_call>": 151658,
3
- "<tool_call>": 151657,
4
- "<|box_end|>": 151649,
5
- "<|box_start|>": 151648,
6
- "<|endoftext|>": 151643,
7
- "<|file_sep|>": 151664,
8
- "<|fim_middle|>": 151660,
9
- "<|fim_pad|>": 151662,
10
- "<|fim_prefix|>": 151659,
11
- "<|fim_suffix|>": 151661,
12
- "<|im_end|>": 151645,
13
- "<|im_start|>": 151644,
14
- "<|image_pad|>": 151655,
15
- "<|image|>": 151665,
16
- "<|object_ref_end|>": 151647,
17
- "<|object_ref_start|>": 151646,
18
- "<|quad_end|>": 151651,
19
- "<|quad_start|>": 151650,
20
- "<|repo_name|>": 151663,
21
- "<|video_pad|>": 151656,
22
- "<|vision_end|>": 151653,
23
- "<|vision_pad|>": 151654,
24
- "<|vision_start|>": 151652
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chat_template.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|image|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
- }
 
1
  {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
config.json CHANGED
@@ -1,15 +1,17 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2.5-VL-3B-Instruct",
3
  "architectures": [
4
  "Qwen2_5_VLForConditionalGeneration"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
  "eos_token_id": 151645,
 
 
 
 
 
9
  "hidden_act": "silu",
10
  "hidden_size": 2048,
11
- "image_token_id": 151665,
12
- "image_placeholder": "<|image|>",
13
  "initializer_range": 0.02,
14
  "intermediate_size": 11008,
15
  "max_position_embeddings": 128000,
@@ -18,6 +20,14 @@
18
  "num_attention_heads": 16,
19
  "num_hidden_layers": 36,
20
  "num_key_value_heads": 2,
 
 
 
 
 
 
 
 
21
  "quantization_config": {
22
  "config_groups": {
23
  "group_0": {
@@ -222,24 +232,6 @@
222
  "quant_method": "compressed-tensors",
223
  "quantization_status": "compressed"
224
  },
225
- "rms_norm_eps": 1e-06,
226
- "rope_scaling": {
227
- "mrope_section": [
228
- 16,
229
- 24,
230
- 24
231
- ],
232
- "rope_type": "default",
233
- "type": "default"
234
- },
235
- "rope_theta": 1000000.0,
236
- "sliding_window": 32768,
237
- "tie_word_embeddings": true,
238
- "torch_dtype": "bfloat16",
239
- "transformers_version": "4.49.0.dev0",
240
- "use_cache": true,
241
- "use_sliding_window": false,
242
- "video_token_id": 151656,
243
  "vision_config": {
244
  "depth": 32,
245
  "hidden_act": "silu",
@@ -260,9 +252,14 @@
260
  ],
261
  "tokens_per_second": 2,
262
  "temporal_patch_size": 2
263
- },
264
- "vision_end_token_id": 151653,
265
- "vision_start_token_id": 151652,
266
- "vision_token_id": 151654,
 
 
 
 
 
267
  "vocab_size": 151936
268
- }
 
1
  {
 
2
  "architectures": [
3
  "Qwen2_5_VLForConditionalGeneration"
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 151643,
7
  "eos_token_id": 151645,
8
+ "vision_start_token_id": 151652,
9
+ "vision_end_token_id": 151653,
10
+ "vision_token_id": 151654,
11
+ "image_token_id": 151655,
12
+ "video_token_id": 151656,
13
  "hidden_act": "silu",
14
  "hidden_size": 2048,
 
 
15
  "initializer_range": 0.02,
16
  "intermediate_size": 11008,
17
  "max_position_embeddings": 128000,
 
20
  "num_attention_heads": 16,
21
  "num_hidden_layers": 36,
22
  "num_key_value_heads": 2,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_theta": 1000000.0,
25
+ "sliding_window": 32768,
26
+ "tie_word_embeddings": true,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.41.2",
29
+ "use_cache": true,
30
+ "use_sliding_window": false,
31
  "quantization_config": {
32
  "config_groups": {
33
  "group_0": {
 
232
  "quant_method": "compressed-tensors",
233
  "quantization_status": "compressed"
234
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  "vision_config": {
236
  "depth": 32,
237
  "hidden_act": "silu",
 
252
  ],
253
  "tokens_per_second": 2,
254
  "temporal_patch_size": 2
255
+ },
256
+ "rope_scaling": {
257
+ "type": "mrope",
258
+ "mrope_section": [
259
+ 16,
260
+ 24,
261
+ 24
262
+ ]
263
+ },
264
  "vocab_size": 151936
265
+ }
generation_config.json CHANGED
@@ -1,14 +1,12 @@
1
  {
2
  "bos_token_id": 151643,
 
3
  "do_sample": true,
4
  "eos_token_id": [
5
  151645,
6
  151643
7
  ],
8
- "pad_token_id": 151643,
9
  "repetition_penalty": 1.05,
10
- "temperature": 0.1,
11
- "top_k": 1,
12
- "top_p": 0.001,
13
- "transformers_version": "4.49.0.dev0"
14
- }
 
1
  {
2
  "bos_token_id": 151643,
3
+ "pad_token_id": 151643,
4
  "do_sample": true,
5
  "eos_token_id": [
6
  151645,
7
  151643
8
  ],
 
9
  "repetition_penalty": 1.05,
10
+ "temperature": 0.000001,
11
+ "transformers_version": "4.49.0"
12
+ }
 
 
merges.txt CHANGED
@@ -1,4 +1,3 @@
1
- #version: 0.2
2
  Ġ Ġ
3
  ĠĠ ĠĠ
4
  i n
 
 
1
  Ġ Ġ
2
  ĠĠ ĠĠ
3
  i n
preprocessor_config.json CHANGED
@@ -16,4 +16,4 @@
16
  ],
17
  "image_processor_type": "Qwen2VLImageProcessor",
18
  "processor_class": "Qwen2_5_VLProcessor"
19
- }
 
16
  ],
17
  "image_processor_type": "Qwen2VLImageProcessor",
18
  "processor_class": "Qwen2_5_VLProcessor"
19
+ }
special_tokens_map.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>",
5
- "<|object_ref_start|>",
6
- "<|object_ref_end|>",
7
- "<|box_start|>",
8
- "<|box_end|>",
9
- "<|quad_start|>",
10
- "<|quad_end|>",
11
- "<|vision_start|>",
12
- "<|vision_end|>",
13
- "<|vision_pad|>",
14
- "<|image_pad|>",
15
- "<|video_pad|>",
16
- "<|image|>"
17
- ],
18
- "eos_token": {
19
- "content": "<|im_end|>",
20
- "lstrip": false,
21
- "normalized": false,
22
- "rstrip": false,
23
- "single_word": false
24
- },
25
- "pad_token": {
26
- "content": "<|endoftext|>",
27
- "lstrip": false,
28
- "normalized": false,
29
- "rstrip": false,
30
- "single_word": false
31
- }
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d439b26a73396e5655f4aab02cff9b00f59ad6102f9915263d074065762204c9
3
- size 11422181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0382117ea329cdf097041132f6d735924b697924d6f6fc3945713e96ce87539
3
+ size 7031645
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "151643": {
@@ -177,14 +176,6 @@
177
  "rstrip": false,
178
  "single_word": false,
179
  "special": false
180
- },
181
- "151665": {
182
- "content": "<|image|>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": true
188
  }
189
  },
190
  "additional_special_tokens": [
@@ -200,22 +191,17 @@
200
  "<|vision_end|>",
201
  "<|vision_pad|>",
202
  "<|image_pad|>",
203
- "<|video_pad|>",
204
- "<|image|>"
205
  ],
206
  "bos_token": null,
207
- "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|image|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
208
  "clean_up_tokenization_spaces": false,
209
  "eos_token": "<|im_end|>",
210
  "errors": "replace",
211
- "extra_special_tokens": {},
212
- "max_length": 2048,
213
  "model_max_length": 131072,
214
  "pad_token": "<|endoftext|>",
215
  "split_special_tokens": false,
216
- "stride": 0,
217
  "tokenizer_class": "Qwen2Tokenizer",
218
- "truncation_side": "right",
219
- "truncation_strategy": "longest_first",
220
- "unk_token": null
221
  }
 
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "151643": {
 
176
  "rstrip": false,
177
  "single_word": false,
178
  "special": false
 
 
 
 
 
 
 
 
179
  }
180
  },
181
  "additional_special_tokens": [
 
191
  "<|vision_end|>",
192
  "<|vision_pad|>",
193
  "<|image_pad|>",
194
+ "<|video_pad|>"
 
195
  ],
196
  "bos_token": null,
197
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
198
  "clean_up_tokenization_spaces": false,
199
  "eos_token": "<|im_end|>",
200
  "errors": "replace",
 
 
201
  "model_max_length": 131072,
202
  "pad_token": "<|endoftext|>",
203
  "split_special_tokens": false,
 
204
  "tokenizer_class": "Qwen2Tokenizer",
205
+ "unk_token": null,
206
+ "add_bos_token": false
 
207
  }