Initial commit of LLaVA model and processor
Browse files- config.json +1 -1
- generation_config.json +1 -1
- model-00001-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- preprocessor_config.json +67 -21
- processor_config.json +3 -2
- special_tokens_map.json +1 -0
- tokenizer_config.json +4 -1
config.json
CHANGED
@@ -44,7 +44,7 @@
|
|
44 |
},
|
45 |
"tie_word_embeddings": false,
|
46 |
"torch_dtype": "float32",
|
47 |
-
"transformers_version": "4.
|
48 |
"vision_config": {
|
49 |
"hidden_act": "gelu_pytorch_tanh",
|
50 |
"hidden_size": 1152,
|
|
|
44 |
},
|
45 |
"tie_word_embeddings": false,
|
46 |
"torch_dtype": "float32",
|
47 |
+
"transformers_version": "4.47.1",
|
48 |
"vision_config": {
|
49 |
"hidden_act": "gelu_pytorch_tanh",
|
50 |
"hidden_size": 1152,
|
generation_config.json
CHANGED
@@ -3,5 +3,5 @@
|
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
"pad_token_id": 32001,
|
6 |
-
"transformers_version": "4.
|
7 |
}
|
|
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
"pad_token_id": 32001,
|
6 |
+
"transformers_version": "4.47.1"
|
7 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4982343568
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:994bb4543a2c76f44db279303e5bd4917c6c74028896a32cb43ebfe916f924f3
|
3 |
size 4982343568
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4490727736
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:825fd0df98f839c16025190216e041b2f7cc13ff5e2bd08c53a1858f747516fa
|
3 |
size 4490727736
|
preprocessor_config.json
CHANGED
@@ -1,51 +1,97 @@
|
|
1 |
{
|
2 |
"crop_size": {
|
3 |
-
"height":
|
4 |
-
"width":
|
5 |
},
|
6 |
"do_center_crop": true,
|
7 |
-
"do_convert_rgb":
|
8 |
"do_normalize": true,
|
9 |
"do_pad": true,
|
10 |
"do_rescale": true,
|
11 |
-
"do_resize":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
"image_grid_pinpoints": [
|
13 |
[
|
14 |
-
|
15 |
-
|
16 |
],
|
17 |
[
|
18 |
-
|
19 |
-
|
20 |
],
|
21 |
[
|
22 |
-
|
23 |
-
|
24 |
],
|
25 |
[
|
26 |
-
|
27 |
-
|
28 |
],
|
29 |
[
|
30 |
-
|
31 |
-
|
32 |
]
|
33 |
],
|
34 |
"image_mean": [
|
35 |
-
0.
|
36 |
-
0.
|
37 |
-
0.
|
38 |
],
|
39 |
"image_processor_type": "LlavaNextImageProcessor",
|
40 |
"image_std": [
|
41 |
-
0.
|
42 |
-
0.
|
43 |
-
0.
|
44 |
],
|
45 |
"processor_class": "LlavaNextProcessor",
|
46 |
"resample": 3,
|
47 |
"rescale_factor": 0.00392156862745098,
|
48 |
"size": {
|
49 |
-
"shortest_edge":
|
50 |
}
|
51 |
}
|
|
|
1 |
{
|
2 |
"crop_size": {
|
3 |
+
"height": 224,
|
4 |
+
"width": 224
|
5 |
},
|
6 |
"do_center_crop": true,
|
7 |
+
"do_convert_rgb": true,
|
8 |
"do_normalize": true,
|
9 |
"do_pad": true,
|
10 |
"do_rescale": true,
|
11 |
+
"do_resize": {
|
12 |
+
"crop_size": {
|
13 |
+
"height": 384,
|
14 |
+
"width": 384
|
15 |
+
},
|
16 |
+
"do_center_crop": true,
|
17 |
+
"do_convert_rgb": false,
|
18 |
+
"do_normalize": true,
|
19 |
+
"do_pad": true,
|
20 |
+
"do_rescale": true,
|
21 |
+
"do_resize": true,
|
22 |
+
"image_grid_pinpoints": [
|
23 |
+
[
|
24 |
+
768,
|
25 |
+
384
|
26 |
+
],
|
27 |
+
[
|
28 |
+
384,
|
29 |
+
768
|
30 |
+
],
|
31 |
+
[
|
32 |
+
768,
|
33 |
+
768
|
34 |
+
],
|
35 |
+
[
|
36 |
+
384,
|
37 |
+
1152
|
38 |
+
],
|
39 |
+
[
|
40 |
+
1152,
|
41 |
+
384
|
42 |
+
]
|
43 |
+
],
|
44 |
+
"image_mean": [
|
45 |
+
0.5,
|
46 |
+
0.5,
|
47 |
+
0.5
|
48 |
+
],
|
49 |
+
"image_std": [
|
50 |
+
0.5,
|
51 |
+
0.5,
|
52 |
+
0.5
|
53 |
+
],
|
54 |
+
"size": {
|
55 |
+
"shortest_edge": 384
|
56 |
+
}
|
57 |
+
},
|
58 |
"image_grid_pinpoints": [
|
59 |
[
|
60 |
+
336,
|
61 |
+
672
|
62 |
],
|
63 |
[
|
64 |
+
672,
|
65 |
+
336
|
66 |
],
|
67 |
[
|
68 |
+
672,
|
69 |
+
672
|
70 |
],
|
71 |
[
|
72 |
+
1008,
|
73 |
+
336
|
74 |
],
|
75 |
[
|
76 |
+
336,
|
77 |
+
1008
|
78 |
]
|
79 |
],
|
80 |
"image_mean": [
|
81 |
+
0.48145466,
|
82 |
+
0.4578275,
|
83 |
+
0.40821073
|
84 |
],
|
85 |
"image_processor_type": "LlavaNextImageProcessor",
|
86 |
"image_std": [
|
87 |
+
0.26862954,
|
88 |
+
0.26130258,
|
89 |
+
0.27577711
|
90 |
],
|
91 |
"processor_class": "LlavaNextProcessor",
|
92 |
"resample": 3,
|
93 |
"rescale_factor": 0.00392156862745098,
|
94 |
"size": {
|
95 |
+
"shortest_edge": 224
|
96 |
}
|
97 |
}
|
processor_config.json
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
{
|
2 |
"image_token": "<image>",
|
3 |
-
"
|
|
|
4 |
"processor_class": "LlavaNextProcessor",
|
5 |
-
"vision_feature_select_strategy":
|
6 |
}
|
|
|
1 |
{
|
2 |
"image_token": "<image>",
|
3 |
+
"num_additional_image_tokens": 0,
|
4 |
+
"patch_size": null,
|
5 |
"processor_class": "LlavaNextProcessor",
|
6 |
+
"vision_feature_select_strategy": null
|
7 |
}
|
special_tokens_map.json
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
|
|
16 |
"pad_token": {
|
17 |
"content": "<unk>",
|
18 |
"lstrip": false,
|
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
+
"image_token": "<image>",
|
17 |
"pad_token": {
|
18 |
"content": "<unk>",
|
19 |
"lstrip": false,
|
tokenizer_config.json
CHANGED
@@ -37,9 +37,12 @@
|
|
37 |
}
|
38 |
},
|
39 |
"bos_token": "<s>",
|
40 |
-
"chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}",
|
41 |
"clean_up_tokenization_spaces": false,
|
42 |
"eos_token": "</s>",
|
|
|
|
|
|
|
|
|
43 |
"legacy": false,
|
44 |
"model_max_length": 4096,
|
45 |
"pad_token": "<unk>",
|
|
|
37 |
}
|
38 |
},
|
39 |
"bos_token": "<s>",
|
|
|
40 |
"clean_up_tokenization_spaces": false,
|
41 |
"eos_token": "</s>",
|
42 |
+
"extra_special_tokens": {
|
43 |
+
"image_token": "<image>"
|
44 |
+
},
|
45 |
+
"image_token": "<image>",
|
46 |
"legacy": false,
|
47 |
"model_max_length": 4096,
|
48 |
"pad_token": "<unk>",
|