Journey9ni commited on
Commit
a843a70
·
verified ·
1 Parent(s): 3b34502

Upload folder using huggingface_hub

Browse files
Files changed (17) hide show
  1. .gitattributes +1 -0
  2. vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/results.json +112 -0
  3. vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/vsibench.json +3 -0
  4. vsibench/0227_2119_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ffbe0/results.json +112 -0
  5. vsibench/0227_2119_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ffbe0/vsibench.json +0 -0
  6. vsibench/0227_2125_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_13af5f/results.json +112 -0
  7. vsibench/0227_2125_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_13af5f/vsibench.json +0 -0
  8. vsibench/0227_2133_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ca234/results.json +112 -0
  9. vsibench/0227_2133_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ca234/vsibench.json +0 -0
  10. vsibench/0227_2134_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_0a55b4/results.json +112 -0
  11. vsibench/0227_2134_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_0a55b4/vsibench.json +0 -0
  12. vsibench/0227_2139_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c76152/results.json +112 -0
  13. vsibench/0227_2139_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c76152/vsibench.json +0 -0
  14. vsibench/0227_2143_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_62e9eb/results.json +112 -0
  15. vsibench/0227_2143_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_62e9eb/vsibench.json +0 -0
  16. vsibench/0227_2147_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_eb18c4/results.json +112 -0
  17. vsibench/0227_2147_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_eb18c4/vsibench.json +0 -0
.gitattributes CHANGED
@@ -37,3 +37,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
37
  0223_0351_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_1ad6e5/vsibench.json filter=lfs diff=lfs merge=lfs -text
38
  0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2229cf/vsibench.json filter=lfs diff=lfs merge=lfs -text
39
  0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_3f39f5/vsibench.json filter=lfs diff=lfs merge=lfs -text
 
 
37
  0223_0351_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_1ad6e5/vsibench.json filter=lfs diff=lfs merge=lfs -text
38
  0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2229cf/vsibench.json filter=lfs diff=lfs merge=lfs -text
39
  0223_2055_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_3f39f5/vsibench.json filter=lfs diff=lfs merge=lfs -text
40
+ vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/vsibench.json filter=lfs diff=lfs merge=lfs -text
vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 33.12872055166613,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002c6720940>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c6721000>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002c67217e0>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002c6722290>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002c67227a0>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 5130,
92
+ "effective": 5130
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_base_lora_ep3,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "e374ddc",
111
+ "date": "0227_2123"
112
+ }
vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_2b3696/vsibench.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25180c5ef080b415e72591ed1418a06ebc911b4752f37d839834f515cc706bd
3
+ size 13369488
vsibench/0227_2119_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ffbe0/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 35.480714020333025,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002e6f6b0a0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e6f6b760>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002e6f6bd90>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002e5c14a60>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002e5c152d0>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 1544,
92
+ "effective": 1544
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_ep3,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "e374ddc",
111
+ "date": "0227_2135"
112
+ }
vsibench/0227_2119_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ffbe0/vsibench.json ADDED
The diff for this file is too large to render. See raw diff
 
vsibench/0227_2125_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_13af5f/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 35.378964434086534,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002e42670a0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e4267760>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002e4267d90>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002e4520a60>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002e45212d0>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 1544,
92
+ "effective": 1544
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_diff_lr_ep3,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "e374ddc",
111
+ "date": "0227_2140"
112
+ }
vsibench/0227_2125_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_13af5f/vsibench.json ADDED
The diff for this file is too large to render. See raw diff
 
vsibench/0227_2133_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ca234/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 36.41748572972608,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002bf8788b0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002bf878f70>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002bf879750>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002bf87a200>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002bf87a710>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 1544,
92
+ "effective": 1544
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_base_lora_ep3/checkpoint-385,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "e374ddc",
111
+ "date": "0227_2144"
112
+ }
vsibench/0227_2133_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4ca234/vsibench.json ADDED
The diff for this file is too large to render. See raw diff
 
vsibench/0227_2134_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_0a55b4/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 32.436417063532275,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002d8d588b0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002d8d58f70>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002d8d59750>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002d8d5a200>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002d8d5a710>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 1544,
92
+ "effective": 1544
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_base_lora_ep3/checkpoint-192,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "e374ddc",
111
+ "date": "0227_2145"
112
+ }
vsibench/0227_2134_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_0a55b4/vsibench.json ADDED
The diff for this file is too large to render. See raw diff
 
vsibench/0227_2139_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c76152/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 30.28599655486878,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002c50930a0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c5093760>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002c5093d90>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002c3d3ca60>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002c3d3d2d0>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 1544,
92
+ "effective": 1544
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_ep3/checkpoint-192,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "e374ddc",
111
+ "date": "0227_2155"
112
+ }
vsibench/0227_2139_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c76152/vsibench.json ADDED
The diff for this file is too large to render. See raw diff
 
vsibench/0227_2143_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_62e9eb/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 30.28599655486878,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002e407b0a0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e407b760>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002e407bd90>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002e4c3ca60>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002e4c3d2d0>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 1544,
92
+ "effective": 1544
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_ep3/checkpoint-385,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "e374ddc",
111
+ "date": "0227_2159"
112
+ }
vsibench/0227_2143_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_62e9eb/vsibench.json ADDED
The diff for this file is too large to render. See raw diff
 
vsibench/0227_2147_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_eb18c4/results.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "vsibench": {
4
+ "alias": "vsibench",
5
+ "vsibench_score,none": 30.28599655486878,
6
+ "vsibench_score_stderr,none": "N/A"
7
+ }
8
+ },
9
+ "group_subtasks": {
10
+ "vsibench": []
11
+ },
12
+ "configs": {
13
+ "vsibench": {
14
+ "task": "vsibench",
15
+ "dataset_path": "nyu-visionx/VSI-Bench",
16
+ "dataset_kwargs": {
17
+ "token": true
18
+ },
19
+ "test_split": "test",
20
+ "full_docs": false,
21
+ "process_results_use_image": false,
22
+ "process_docs": "<function process_docs at 0x4002c2bc70a0>",
23
+ "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c2bc7760>",
24
+ "doc_to_text": "<function vsibench_doc_to_text at 0x4002c2bc7d90>",
25
+ "doc_to_target": "ground_truth",
26
+ "process_results": "<function vsibench_process_results at 0x4002c377ca60>",
27
+ "description": "",
28
+ "target_delimiter": " ",
29
+ "fewshot_delimiter": "\n\n",
30
+ "num_fewshot": 0,
31
+ "metric_list": [
32
+ {
33
+ "metric": "vsibench_score",
34
+ "aggregation": "<function vsibench_aggregate_results at 0x4002c377d2d0>",
35
+ "higher_is_better": true
36
+ }
37
+ ],
38
+ "output_type": "generate_until",
39
+ "generation_kwargs": {
40
+ "max_new_tokens": 16,
41
+ "temperature": 0.0,
42
+ "top_p": 1.0,
43
+ "num_beams": 1,
44
+ "do_sample": false,
45
+ "until": [
46
+ "\n\n"
47
+ ]
48
+ },
49
+ "repeats": 1,
50
+ "should_decontaminate": false,
51
+ "metadata": [
52
+ {
53
+ "version": 0.0
54
+ }
55
+ ],
56
+ "lmms_eval_specific_kwargs": {
57
+ "default": {
58
+ "pre_prompt": "",
59
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
60
+ "na_post_prompt": "Please answer the question using a single word or phrase."
61
+ },
62
+ "gemini_api": {
63
+ "pre_prompt": "",
64
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
65
+ "na_post_prompt": "Do not response anything other than a single number!"
66
+ },
67
+ "gpt4v": {
68
+ "pre_prompt": "",
69
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
70
+ "na_post_prompt": "Do not response anything other than a single number!"
71
+ },
72
+ "pre_prompt": "",
73
+ "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
74
+ "na_post_prompt": "Please answer the question using a single word or phrase."
75
+ }
76
+ }
77
+ },
78
+ "versions": {
79
+ "vsibench": "Yaml"
80
+ },
81
+ "n-shot": {
82
+ "vsibench": 0
83
+ },
84
+ "higher_is_better": {
85
+ "vsibench": {
86
+ "vsibench_score": true
87
+ }
88
+ },
89
+ "n-samples": {
90
+ "vsibench": {
91
+ "original": 1544,
92
+ "effective": 1544
93
+ }
94
+ },
95
+ "config": {
96
+ "model": "llava_onevision",
97
+ "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-02_27_cut3r_lora_diff_lr_ep3/checkpoint-385,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
98
+ "batch_size": "1",
99
+ "batch_sizes": [],
100
+ "device": null,
101
+ "use_cache": null,
102
+ "limit": null,
103
+ "bootstrap_iters": 100000,
104
+ "gen_kwargs": "",
105
+ "random_seed": 0,
106
+ "numpy_seed": 1234,
107
+ "torch_seed": 1234,
108
+ "fewshot_seed": 1234
109
+ },
110
+ "git_hash": "e374ddc",
111
+ "date": "0227_2203"
112
+ }
vsibench/0227_2147_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_eb18c4/vsibench.json ADDED
The diff for this file is too large to render. See raw diff