Journey9ni commited on
Commit
7259050
·
verified ·
1 Parent(s): bb111dd

Upload folder using huggingface_hub

Browse files
Files changed (19) hide show
  1. .gitattributes +9 -0
  2. 20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/results.json +112 -0
  3. 20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/vsibench.json +3 -0
  4. 20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/results.json +112 -0
  5. 20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/vsibench.json +3 -0
  6. 20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/results.json +112 -0
  7. 20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/vsibench.json +3 -0
  8. 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/results.json +112 -0
  9. 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/vsibench.json +3 -0
  10. 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/results.json +112 -0
  11. 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/vsibench.json +3 -0
  12. 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/results.json +112 -0
  13. 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/vsibench.json +3 -0
  14. 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/results.json +112 -0
  15. 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/vsibench.json +3 -0
  16. 20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/results.json +112 -0
  17. 20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/vsibench.json +3 -0
  18. 20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/results.json +112 -0
  19. 20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/vsibench.json +3 -0
.gitattributes CHANGED
@@ -43,3 +43,12 @@ vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model
43
  20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_627798/vsibench.json filter=lfs diff=lfs merge=lfs -text
44
  20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
45
  20250313/vsibench/0313_1215_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
43
  20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_627798/vsibench.json filter=lfs diff=lfs merge=lfs -text
44
  20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
45
  20250313/vsibench/0313_1215_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
46
+ 20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/vsibench.json filter=lfs diff=lfs merge=lfs -text
47
+ 20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/vsibench.json filter=lfs diff=lfs merge=lfs -text
48
+ 20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/vsibench.json filter=lfs diff=lfs merge=lfs -text
49
+ 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/vsibench.json filter=lfs diff=lfs merge=lfs -text
50
+ 20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/vsibench.json filter=lfs diff=lfs merge=lfs -text
51
+ 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/vsibench.json filter=lfs diff=lfs merge=lfs -text
52
+ 20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/vsibench.json filter=lfs diff=lfs merge=lfs -text
53
+ 20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/vsibench.json filter=lfs diff=lfs merge=lfs -text
54
+ 20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/vsibench.json filter=lfs diff=lfs merge=lfs -text
20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 53.61223239449813,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002bb66ca60>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002bb66d120>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002bb66dc60>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002bb66e7a0>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002bb66ea70>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_base_stage2/checkpoint-700,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "2024013",
111
+ "date": "0326_0708"
112
+ }
20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aaf6e5be8e364cfb58b0a75d4b66bd3bf92a7692af5179f691d44fa51f7ab97
3
+ size 13363855
20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 56.122387716918986,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002c7d68a60>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c7d69120>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002c7d69c60>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002c7d6a7a0>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002c7d6aa70>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_base_stage2/checkpoint-1400,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "2024013",
111
+ "date": "0326_0858"
112
+ }
20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d5a2f1772236230255ff7619d6a0e064c2c03b016e685aa1147081a2043323e
3
+ size 13363860
20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 54.29945296982057,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002e619f1c0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e619f880>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002e6720160>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002e6720d30>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002e6721630>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_mlp/checkpoint-700,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "2024013",
111
+ "date": "0326_0937"
112
+ }
20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88b2f65c2223629dde80b722b02d5261fe99d64829064e0178b8f8b7cd8f99a
3
+ size 13363688
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 51.91108578346994,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002d6ab31c0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002d6ab3880>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002d6e58160>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002d6e58d30>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002d6e59630>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_cross_attn/checkpoint-700,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "2024013",
111
+ "date": "0326_0940"
112
+ }
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c328b0617cfe868c1a5b5c6319c5e8aa33fe78e1f56f38cef797e386518b9733
3
+ size 13363867
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 56.559993086996506,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002b6c50a60>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002b6c51120>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002b6c51c60>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002b6c527a0>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002b6c52a70>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_base_stage2/checkpoint-2100,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "2024013",
111
+ "date": "0326_0923"
112
+ }
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b00fce1f7f47b0bf3f08741c5f4075dab9c9ef8948c36a5ab4f3cc4ed75d605
3
+ size 13363948
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 56.38881021576546,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002c19631c0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c1963880>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002c2150160>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002c2150d30>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002c2151630>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_cross_attn/checkpoint-1400,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "2024013",
111
+ "date": "0326_1207"
112
+ }
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c24a29e8778b25d2978701148210e1664b2e8e7dc6b122c13a853a143a88ce90
3
+ size 13363704
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 58.30556075895211,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002eb26b1c0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002eb26b880>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002eb720160>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002eb720d30>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002eb721630>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_mlp/checkpoint-1400,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "2024013",
111
+ "date": "0326_1208"
112
+ }
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d8393c30f75d0f80d454be88a5da62050adedbe979c05b49abb80c028c23bf
3
+ size 13363849
20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 58.6854936357191,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002b87d31c0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002b87d3880>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002b7364160>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002b7364d30>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002b7365630>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_mlp/checkpoint-2100,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "2024013",
111
+ "date": "0326_1539"
112
+ }
20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be787f64c3844cfa41bdba4f70ddfe5e2dd808c08cb5e855fd76e3a9f779cc6
3
+ size 13363781
20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 57.88343514494699,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002f20ef1c0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002f20ef880>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002f2b94160>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002f2b94d30>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002f2b95630>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_cross_attn/checkpoint-2100,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "2024013",
111
+ "date": "0326_1545"
112
+ }
20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:557fca3e71233278a98ae8bd0eb0068f2eb04028eed56b2f8c74e7002817aab2
3
+ size 13363872