Upload folder using huggingface_hub
Browse files- .gitattributes +9 -0
- 20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/results.json +112 -0
- 20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/vsibench.json +3 -0
- 20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/results.json +112 -0
- 20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/vsibench.json +3 -0
- 20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/results.json +112 -0
- 20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/vsibench.json +3 -0
- 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/results.json +112 -0
- 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/vsibench.json +3 -0
- 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/results.json +112 -0
- 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/vsibench.json +3 -0
- 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/results.json +112 -0
- 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/vsibench.json +3 -0
- 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/results.json +112 -0
- 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/vsibench.json +3 -0
- 20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/results.json +112 -0
- 20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/vsibench.json +3 -0
- 20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/results.json +112 -0
- 20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/vsibench.json +3 -0
.gitattributes
CHANGED
@@ -43,3 +43,12 @@ vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model
|
|
43 |
20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_627798/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
44 |
20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
45 |
20250313/vsibench/0313_1215_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_627798/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
44 |
20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
45 |
20250313/vsibench/0313_1215_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
46 |
+
20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
47 |
+
20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
48 |
+
20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
49 |
+
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
50 |
+
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
51 |
+
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
52 |
+
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
53 |
+
20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
54 |
+
20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/vsibench.json filter=lfs diff=lfs merge=lfs -text
|
20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 53.61223239449813,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002bb66ca60>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002bb66d120>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002bb66dc60>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002bb66e7a0>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002bb66ea70>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_base_stage2/checkpoint-700,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "2024013",
|
111 |
+
"date": "0326_0708"
|
112 |
+
}
|
20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5aaf6e5be8e364cfb58b0a75d4b66bd3bf92a7692af5179f691d44fa51f7ab97
|
3 |
+
size 13363855
|
20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 56.122387716918986,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002c7d68a60>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c7d69120>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002c7d69c60>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002c7d6a7a0>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002c7d6aa70>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_base_stage2/checkpoint-1400,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "2024013",
|
111 |
+
"date": "0326_0858"
|
112 |
+
}
|
20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d5a2f1772236230255ff7619d6a0e064c2c03b016e685aa1147081a2043323e
|
3 |
+
size 13363860
|
20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 54.29945296982057,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002e619f1c0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e619f880>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002e6720160>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002e6720d30>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002e6721630>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_mlp/checkpoint-700,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "2024013",
|
111 |
+
"date": "0326_0937"
|
112 |
+
}
|
20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e88b2f65c2223629dde80b722b02d5261fe99d64829064e0178b8f8b7cd8f99a
|
3 |
+
size 13363688
|
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 51.91108578346994,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002d6ab31c0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002d6ab3880>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002d6e58160>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002d6e58d30>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002d6e59630>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_cross_attn/checkpoint-700,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "2024013",
|
111 |
+
"date": "0326_0940"
|
112 |
+
}
|
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c328b0617cfe868c1a5b5c6319c5e8aa33fe78e1f56f38cef797e386518b9733
|
3 |
+
size 13363867
|
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 56.559993086996506,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002b6c50a60>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002b6c51120>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002b6c51c60>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002b6c527a0>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002b6c52a70>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_base_stage2/checkpoint-2100,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "2024013",
|
111 |
+
"date": "0326_0923"
|
112 |
+
}
|
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b00fce1f7f47b0bf3f08741c5f4075dab9c9ef8948c36a5ab4f3cc4ed75d605
|
3 |
+
size 13363948
|
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 56.38881021576546,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002c19631c0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c1963880>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002c2150160>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002c2150d30>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002c2151630>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_cross_attn/checkpoint-1400,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "2024013",
|
111 |
+
"date": "0326_1207"
|
112 |
+
}
|
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c24a29e8778b25d2978701148210e1664b2e8e7dc6b122c13a853a143a88ce90
|
3 |
+
size 13363704
|
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 58.30556075895211,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002eb26b1c0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002eb26b880>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002eb720160>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002eb720d30>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002eb721630>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_mlp/checkpoint-1400,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "2024013",
|
111 |
+
"date": "0326_1208"
|
112 |
+
}
|
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50d8393c30f75d0f80d454be88a5da62050adedbe979c05b49abb80c028c23bf
|
3 |
+
size 13363849
|
20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 58.6854936357191,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002b87d31c0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002b87d3880>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002b7364160>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002b7364d30>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002b7365630>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_mlp/checkpoint-2100,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "2024013",
|
111 |
+
"date": "0326_1539"
|
112 |
+
}
|
20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6be787f64c3844cfa41bdba4f70ddfe5e2dd808c08cb5e855fd76e3a9f779cc6
|
3 |
+
size 13363781
|
20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/results.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"vsibench": {
|
4 |
+
"alias": "vsibench",
|
5 |
+
"vsibench_score,none": 57.88343514494699,
|
6 |
+
"vsibench_score_stderr,none": "N/A"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"vsibench": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"vsibench": {
|
14 |
+
"task": "vsibench",
|
15 |
+
"dataset_path": "nyu-visionx/VSI-Bench",
|
16 |
+
"dataset_kwargs": {
|
17 |
+
"token": true
|
18 |
+
},
|
19 |
+
"test_split": "test",
|
20 |
+
"full_docs": false,
|
21 |
+
"process_results_use_image": false,
|
22 |
+
"process_docs": "<function process_docs at 0x4002f20ef1c0>",
|
23 |
+
"doc_to_visual": "<function vsibench_doc_to_visual at 0x4002f20ef880>",
|
24 |
+
"doc_to_text": "<function vsibench_doc_to_text at 0x4002f2b94160>",
|
25 |
+
"doc_to_target": "ground_truth",
|
26 |
+
"process_results": "<function vsibench_process_results at 0x4002f2b94d30>",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 0,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "vsibench_score",
|
34 |
+
"aggregation": "<function vsibench_aggregate_results at 0x4002f2b95630>",
|
35 |
+
"higher_is_better": true
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"output_type": "generate_until",
|
39 |
+
"generation_kwargs": {
|
40 |
+
"max_new_tokens": 16,
|
41 |
+
"temperature": 0.0,
|
42 |
+
"top_p": 1.0,
|
43 |
+
"num_beams": 1,
|
44 |
+
"do_sample": false,
|
45 |
+
"until": [
|
46 |
+
"\n\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"repeats": 1,
|
50 |
+
"should_decontaminate": false,
|
51 |
+
"metadata": [
|
52 |
+
{
|
53 |
+
"version": 0.0
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"lmms_eval_specific_kwargs": {
|
57 |
+
"default": {
|
58 |
+
"pre_prompt": "",
|
59 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
60 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
61 |
+
},
|
62 |
+
"gemini_api": {
|
63 |
+
"pre_prompt": "",
|
64 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
65 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
66 |
+
},
|
67 |
+
"gpt4v": {
|
68 |
+
"pre_prompt": "",
|
69 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
70 |
+
"na_post_prompt": "Do not response anything other than a single number!"
|
71 |
+
},
|
72 |
+
"pre_prompt": "",
|
73 |
+
"mca_post_prompt": "Answer with the option's letter from the given choices directly.",
|
74 |
+
"na_post_prompt": "Please answer the question using a single word or phrase."
|
75 |
+
}
|
76 |
+
}
|
77 |
+
},
|
78 |
+
"versions": {
|
79 |
+
"vsibench": "Yaml"
|
80 |
+
},
|
81 |
+
"n-shot": {
|
82 |
+
"vsibench": 0
|
83 |
+
},
|
84 |
+
"higher_is_better": {
|
85 |
+
"vsibench": {
|
86 |
+
"vsibench_score": true
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"n-samples": {
|
90 |
+
"vsibench": {
|
91 |
+
"original": 5130,
|
92 |
+
"effective": 5130
|
93 |
+
}
|
94 |
+
},
|
95 |
+
"config": {
|
96 |
+
"model": "llava_onevision",
|
97 |
+
"model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_cross_attn/checkpoint-2100,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
|
98 |
+
"batch_size": "1",
|
99 |
+
"batch_sizes": [],
|
100 |
+
"device": null,
|
101 |
+
"use_cache": null,
|
102 |
+
"limit": null,
|
103 |
+
"bootstrap_iters": 100000,
|
104 |
+
"gen_kwargs": "",
|
105 |
+
"random_seed": 0,
|
106 |
+
"numpy_seed": 1234,
|
107 |
+
"torch_seed": 1234,
|
108 |
+
"fewshot_seed": 1234
|
109 |
+
},
|
110 |
+
"git_hash": "2024013",
|
111 |
+
"date": "0326_1545"
|
112 |
+
}
|
20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/vsibench.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:557fca3e71233278a98ae8bd0eb0068f2eb04028eed56b2f8c74e7002817aab2
|
3 |
+
size 13363872
|