giobin commited on
Commit
55a1b86
·
1 Parent(s): 89b5d0e

Initial commit of LLaVA model and processor

Browse files
config.json CHANGED
@@ -44,7 +44,7 @@
44
  },
45
  "tie_word_embeddings": false,
46
  "torch_dtype": "float32",
47
- "transformers_version": "4.45.2",
48
  "vision_config": {
49
  "hidden_act": "gelu_pytorch_tanh",
50
  "hidden_size": 1152,
 
44
  },
45
  "tie_word_embeddings": false,
46
  "torch_dtype": "float32",
47
+ "transformers_version": "4.47.1",
48
  "vision_config": {
49
  "hidden_act": "gelu_pytorch_tanh",
50
  "hidden_size": 1152,
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 32001,
6
- "transformers_version": "4.45.2"
7
  }
 
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 32001,
6
+ "transformers_version": "4.47.1"
7
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0e8517d6c3f6979745cd3927db7597b112a78200d366cd84fc365cc0dc07c6f
3
  size 4982343568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994bb4543a2c76f44db279303e5bd4917c6c74028896a32cb43ebfe916f924f3
3
  size 4982343568
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad0982732bed63d02a71e528973081fd739d22fb92a4dccab1f8b3d03d8099a
3
  size 4490727736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:825fd0df98f839c16025190216e041b2f7cc13ff5e2bd08c53a1858f747516fa
3
  size 4490727736
preprocessor_config.json CHANGED
@@ -1,51 +1,97 @@
1
  {
2
  "crop_size": {
3
- "height": 384,
4
- "width": 384
5
  },
6
  "do_center_crop": true,
7
- "do_convert_rgb": false,
8
  "do_normalize": true,
9
  "do_pad": true,
10
  "do_rescale": true,
11
- "do_resize": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "image_grid_pinpoints": [
13
  [
14
- 768,
15
- 384
16
  ],
17
  [
18
- 384,
19
- 768
20
  ],
21
  [
22
- 768,
23
- 768
24
  ],
25
  [
26
- 384,
27
- 1152
28
  ],
29
  [
30
- 1152,
31
- 384
32
  ]
33
  ],
34
  "image_mean": [
35
- 0.5,
36
- 0.5,
37
- 0.5
38
  ],
39
  "image_processor_type": "LlavaNextImageProcessor",
40
  "image_std": [
41
- 0.5,
42
- 0.5,
43
- 0.5
44
  ],
45
  "processor_class": "LlavaNextProcessor",
46
  "resample": 3,
47
  "rescale_factor": 0.00392156862745098,
48
  "size": {
49
- "shortest_edge": 384
50
  }
51
  }
 
1
  {
2
  "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
  },
6
  "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
  "do_normalize": true,
9
  "do_pad": true,
10
  "do_rescale": true,
11
+ "do_resize": {
12
+ "crop_size": {
13
+ "height": 384,
14
+ "width": 384
15
+ },
16
+ "do_center_crop": true,
17
+ "do_convert_rgb": false,
18
+ "do_normalize": true,
19
+ "do_pad": true,
20
+ "do_rescale": true,
21
+ "do_resize": true,
22
+ "image_grid_pinpoints": [
23
+ [
24
+ 768,
25
+ 384
26
+ ],
27
+ [
28
+ 384,
29
+ 768
30
+ ],
31
+ [
32
+ 768,
33
+ 768
34
+ ],
35
+ [
36
+ 384,
37
+ 1152
38
+ ],
39
+ [
40
+ 1152,
41
+ 384
42
+ ]
43
+ ],
44
+ "image_mean": [
45
+ 0.5,
46
+ 0.5,
47
+ 0.5
48
+ ],
49
+ "image_std": [
50
+ 0.5,
51
+ 0.5,
52
+ 0.5
53
+ ],
54
+ "size": {
55
+ "shortest_edge": 384
56
+ }
57
+ },
58
  "image_grid_pinpoints": [
59
  [
60
+ 336,
61
+ 672
62
  ],
63
  [
64
+ 672,
65
+ 336
66
  ],
67
  [
68
+ 672,
69
+ 672
70
  ],
71
  [
72
+ 1008,
73
+ 336
74
  ],
75
  [
76
+ 336,
77
+ 1008
78
  ]
79
  ],
80
  "image_mean": [
81
+ 0.48145466,
82
+ 0.4578275,
83
+ 0.40821073
84
  ],
85
  "image_processor_type": "LlavaNextImageProcessor",
86
  "image_std": [
87
+ 0.26862954,
88
+ 0.26130258,
89
+ 0.27577711
90
  ],
91
  "processor_class": "LlavaNextProcessor",
92
  "resample": 3,
93
  "rescale_factor": 0.00392156862745098,
94
  "size": {
95
+ "shortest_edge": 224
96
  }
97
  }
processor_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "image_token": "<image>",
3
- "patch_size": 14,
 
4
  "processor_class": "LlavaNextProcessor",
5
- "vision_feature_select_strategy": "default"
6
  }
 
1
  {
2
  "image_token": "<image>",
3
+ "num_additional_image_tokens": 0,
4
+ "patch_size": null,
5
  "processor_class": "LlavaNextProcessor",
6
+ "vision_feature_select_strategy": null
7
  }
special_tokens_map.json CHANGED
@@ -13,6 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
16
  "pad_token": {
17
  "content": "<unk>",
18
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "image_token": "<image>",
17
  "pad_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -37,9 +37,12 @@
37
  }
38
  },
39
  "bos_token": "<s>",
40
- "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}",
41
  "clean_up_tokenization_spaces": false,
42
  "eos_token": "</s>",
 
 
 
 
43
  "legacy": false,
44
  "model_max_length": 4096,
45
  "pad_token": "<unk>",
 
37
  }
38
  },
39
  "bos_token": "<s>",
 
40
  "clean_up_tokenization_spaces": false,
41
  "eos_token": "</s>",
42
+ "extra_special_tokens": {
43
+ "image_token": "<image>"
44
+ },
45
+ "image_token": "<image>",
46
  "legacy": false,
47
  "model_max_length": 4096,
48
  "pad_token": "<unk>",