Upload folder using huggingface_hub
Browse files- .gitattributes +4 -0
- 0223_0350_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_8ff049/results.json +112 -0
- 0223_0350_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_8ff049/vsibench.json +3 -0
- 0223_0351_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_1ad6e5/results.json +112 -0
- 0223_0351_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_1ad6e5/vsibench.json +3 -0
- 0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2229cf/results.json +112 -0
- 0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2229cf/vsibench.json +3 -0
- 0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_3f39f5/results.json +112 -0
- 0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_3f39f5/vsibench.json +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
0223_0350_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_8ff049/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
37 |
+
0223_0351_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_1ad6e5/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
38 |
+
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2229cf/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
39 |
+
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_3f39f5/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
0223_0350_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_8ff049/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 48.56221772781847,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002f1dc4af0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002f1dc51b0>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002f1dc5750>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002f1dc65f0>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002f1dc6f80>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-base_lora_vsibench_02_23,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "c8653c8",
|
111 |
+
"date": "0223_0404"
|
112 |
+
}
|
0223_0350_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_8ff049/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fff43618ca040c87d16704ddb173218533ccd63da8849b000b6a7d16e68ca17
|
3 |
+
size 13437307
|
0223_0351_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_1ad6e5/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 45.23785592237283,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002e1e830a0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e1e83760>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002e0b38040>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002e0b38c10>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002e0b39510>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-cut3r_only_stage2_lora_vsibench_02_23,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "c8653c8",
|
111 |
+
"date": "0223_0407"
|
112 |
+
}
|
0223_0351_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_1ad6e5/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7783eef03dd4466f0fa5022d7ab9164fb06c9a81f778441a1559aa2878346700
|
3 |
+
size 13456732
|
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2229cf/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 34.41087854228516,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002beb631c0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002beb63880>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002bd50c160>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002bd50cd30>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002bd50d630>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-cut3r_stage2_lr1e-4_cat_02_23,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "c8653c8",
|
111 |
+
"date": "0223_2111"
|
112 |
+
}
|
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2229cf/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19dba4e52f22f327eb22ed1a3af41745fb701550c83d2676b77c9d0ea37a9990
|
3 |
+
size 13364198
|
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_3f39f5/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 54.84086609704796,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002b657caf0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002b657d1b0>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002b657dcf0>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002b657e830>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002b657eb00>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-base_stage2_lr1e-4_cat_02_23,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "c8653c8",
|
111 |
+
"date": "0223_2108"
|
112 |
+
}
|
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_3f39f5/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b916cc64a08eb74a4dd71ccb925632e0e2f4e21e1110e455074a2beb6b1f8490
|
3 |
+
size 13363805
|