Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- 20250227/vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/results.json +112 -0
- 20250227/vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/vsibench.json +3 -0
- 20250227/vsibench/0227_2119_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ffbe0/results.json +112 -0
- 20250227/vsibench/0227_2119_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ffbe0/vsibench.json +0 -0
- 20250227/vsibench/0227_2125_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_13af5f/results.json +112 -0
- 20250227/vsibench/0227_2125_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_13af5f/vsibench.json +0 -0
- 20250227/vsibench/0227_2133_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ca234/results.json +112 -0
- 20250227/vsibench/0227_2133_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ca234/vsibench.json +0 -0
- 20250227/vsibench/0227_2134_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_0a55b4/results.json +112 -0
- 20250227/vsibench/0227_2134_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_0a55b4/vsibench.json +0 -0
- 20250227/vsibench/0227_2139_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c76152/results.json +112 -0
- 20250227/vsibench/0227_2139_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c76152/vsibench.json +0 -0
- 20250227/vsibench/0227_2143_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_62e9eb/results.json +112 -0
- 20250227/vsibench/0227_2143_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_62e9eb/vsibench.json +0 -0
- 20250227/vsibench/0227_2147_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_eb18c4/results.json +112 -0
- 20250227/vsibench/0227_2147_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_eb18c4/vsibench.json +0 -0
.gitattributes
CHANGED
@@ -38,3 +38,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
38 |
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2229cf/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
39 |
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_3f39f5/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
40 |
vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
38 |
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2229cf/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
39 |
0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_3f39f5/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
40 |
vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
41 |
+
20250227/vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
20250227/vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 33.12872055166613,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002c6720940>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c6721000>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002c67217e0>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002c6722290>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002c67227a0>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_base_lora_ep3,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "e374ddc",
|
111 |
+
"date": "0227_2123"
|
112 |
+
}
|
20250227/vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f25180c5ef080b415e72591ed1418a06ebc911b4752f37d839834f515cc706bd
|
3 |
+
size 13369488
|
20250227/vsibench/0227_2119_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ffbe0/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 35.480714020333025,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002e6f6b0a0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e6f6b760>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002e6f6bd90>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002e5c14a60>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002e5c152d0>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 1544,
|
92 |
+
"effective": 1544
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_ep3,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "e374ddc",
|
111 |
+
"date": "0227_2135"
|
112 |
+
}
|
20250227/vsibench/0227_2119_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ffbe0/vsibench.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20250227/vsibench/0227_2125_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_13af5f/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 35.378964434086534,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002e42670a0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e4267760>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002e4267d90>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002e4520a60>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002e45212d0>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 1544,
|
92 |
+
"effective": 1544
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_diff_lr_ep3,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "e374ddc",
|
111 |
+
"date": "0227_2140"
|
112 |
+
}
|
20250227/vsibench/0227_2125_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_13af5f/vsibench.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20250227/vsibench/0227_2133_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ca234/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 36.41748572972608,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002bf8788b0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002bf878f70>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002bf879750>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002bf87a200>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002bf87a710>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 1544,
|
92 |
+
"effective": 1544
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_base_lora_ep3/checkpoint-385,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "e374ddc",
|
111 |
+
"date": "0227_2144"
|
112 |
+
}
|
20250227/vsibench/0227_2133_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ca234/vsibench.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20250227/vsibench/0227_2134_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_0a55b4/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 32.436417063532275,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002d8d588b0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002d8d58f70>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002d8d59750>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002d8d5a200>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002d8d5a710>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 1544,
|
92 |
+
"effective": 1544
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_base_lora_ep3/checkpoint-192,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "e374ddc",
|
111 |
+
"date": "0227_2145"
|
112 |
+
}
|
20250227/vsibench/0227_2134_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_0a55b4/vsibench.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20250227/vsibench/0227_2139_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c76152/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 30.28599655486878,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002c50930a0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c5093760>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002c5093d90>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002c3d3ca60>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002c3d3d2d0>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 1544,
|
92 |
+
"effective": 1544
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_ep3/checkpoint-192,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "e374ddc",
|
111 |
+
"date": "0227_2155"
|
112 |
+
}
|
20250227/vsibench/0227_2139_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c76152/vsibench.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20250227/vsibench/0227_2143_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_62e9eb/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 30.28599655486878,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002e407b0a0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e407b760>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002e407bd90>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002e4c3ca60>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002e4c3d2d0>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 1544,
|
92 |
+
"effective": 1544
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_ep3/checkpoint-385,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "e374ddc",
|
111 |
+
"date": "0227_2159"
|
112 |
+
}
|
20250227/vsibench/0227_2143_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_62e9eb/vsibench.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20250227/vsibench/0227_2147_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_eb18c4/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 30.28599655486878,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002c2bc70a0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c2bc7760>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002c2bc7d90>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002c377ca60>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002c377d2d0>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 1544,
|
92 |
+
"effective": 1544
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_diff_lr_ep3/checkpoint-385,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "e374ddc",
|
111 |
+
"date": "0227_2203"
|
112 |
+
}
|
20250227/vsibench/0227_2147_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_eb18c4/vsibench.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|