Journey9ni commited on Mar 27

Commit

7259050

verified ·

1 Parent(s): bb111dd

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

.gitattributes +9 -0
20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/results.json +112 -0
20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/vsibench.json +3 -0
20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/results.json +112 -0
20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/vsibench.json +3 -0
20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/results.json +112 -0
20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/vsibench.json +3 -0
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/results.json +112 -0
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/vsibench.json +3 -0
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/results.json +112 -0
20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/vsibench.json +3 -0
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/results.json +112 -0
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/vsibench.json +3 -0
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/results.json +112 -0
20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/vsibench.json +3 -0
20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/results.json +112 -0
20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/vsibench.json +3 -0
20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/results.json +112 -0
20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/vsibench.json +3 -0

.gitattributes CHANGED Viewed

@@ -43,3 +43,12 @@ vsibench/0227_2109_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model
 20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_627798/vsibench.json filter=lfs diff=lfs merge=lfs -text
 20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
 20250313/vsibench/0313_1215_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text

 20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_627798/vsibench.json filter=lfs diff=lfs merge=lfs -text
 20250313/vsibench/0313_1149_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
 20250313/vsibench/0313_1215_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_b9e55f/vsibench.json filter=lfs diff=lfs merge=lfs -text
+20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/vsibench.json filter=lfs diff=lfs merge=lfs -text
+20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/vsibench.json filter=lfs diff=lfs merge=lfs -text
+20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/vsibench.json filter=lfs diff=lfs merge=lfs -text
+20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/vsibench.json filter=lfs diff=lfs merge=lfs -text
+20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/vsibench.json filter=lfs diff=lfs merge=lfs -text
+20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/vsibench.json filter=lfs diff=lfs merge=lfs -text
+20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/vsibench.json filter=lfs diff=lfs merge=lfs -text
+20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/vsibench.json filter=lfs diff=lfs merge=lfs -text
+20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/vsibench.json filter=lfs diff=lfs merge=lfs -text

20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/results.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+    "results": {
+        "vsibench": {
+            "alias": "vsibench",
+            "vsibench_score,none": 53.61223239449813,
+            "vsibench_score_stderr,none": "N/A"
+        }
+    },
+    "group_subtasks": {
+        "vsibench": []
+    },
+    "configs": {
+        "vsibench": {
+            "task": "vsibench",
+            "dataset_path": "nyu-visionx/VSI-Bench",
+            "dataset_kwargs": {
+                "token": true
+            },
+            "test_split": "test",
+            "full_docs": false,
+            "process_results_use_image": false,
+            "process_docs": "<function process_docs at 0x4002bb66ca60>",
+            "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002bb66d120>",
+            "doc_to_text": "<function vsibench_doc_to_text at 0x4002bb66dc60>",
+            "doc_to_target": "ground_truth",
+            "process_results": "<function vsibench_process_results at 0x4002bb66e7a0>",
+            "description": "",
+            "target_delimiter": " ",
+            "fewshot_delimiter": "\n\n",
+            "num_fewshot": 0,
+            "metric_list": [
+                {
+                    "metric": "vsibench_score",
+                    "aggregation": "<function vsibench_aggregate_results at 0x4002bb66ea70>",
+                    "higher_is_better": true
+                }
+            ],
+            "output_type": "generate_until",
+            "generation_kwargs": {
+                "max_new_tokens": 16,
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "num_beams": 1,
+                "do_sample": false,
+                "until": [
+                    "\n\n"
+                ]
+            },
+            "repeats": 1,
+            "should_decontaminate": false,
+            "metadata": [
+                {
+                    "version": 0.0
+                }
+            ],
+            "lmms_eval_specific_kwargs": {
+                "default": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Please answer the question using a single word or phrase."
+                },
+                "gemini_api": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "gpt4v": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "pre_prompt": "",
+                "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                "na_post_prompt": "Please answer the question using a single word or phrase."
+            }
+        }
+    },
+    "versions": {
+        "vsibench": "Yaml"
+    },
+    "n-shot": {
+        "vsibench": 0
+    },
+    "higher_is_better": {
+        "vsibench": {
+            "vsibench_score": true
+        }
+    },
+    "n-samples": {
+        "vsibench": {
+            "original": 5130,
+            "effective": 5130
+        }
+    },
+    "config": {
+        "model": "llava_onevision",
+        "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_base_stage2/checkpoint-700,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
+        "batch_size": "1",
+        "batch_sizes": [],
+        "device": null,
+        "use_cache": null,
+        "limit": null,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": "",
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+    },
+    "git_hash": "2024013",
+    "date": "0326_0708"
+}

20250326/vsibench/0326_0655_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_937d1d/vsibench.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5aaf6e5be8e364cfb58b0a75d4b66bd3bf92a7692af5179f691d44fa51f7ab97
+size 13363855

20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/results.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+    "results": {
+        "vsibench": {
+            "alias": "vsibench",
+            "vsibench_score,none": 56.122387716918986,
+            "vsibench_score_stderr,none": "N/A"
+        }
+    },
+    "group_subtasks": {
+        "vsibench": []
+    },
+    "configs": {
+        "vsibench": {
+            "task": "vsibench",
+            "dataset_path": "nyu-visionx/VSI-Bench",
+            "dataset_kwargs": {
+                "token": true
+            },
+            "test_split": "test",
+            "full_docs": false,
+            "process_results_use_image": false,
+            "process_docs": "<function process_docs at 0x4002c7d68a60>",
+            "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c7d69120>",
+            "doc_to_text": "<function vsibench_doc_to_text at 0x4002c7d69c60>",
+            "doc_to_target": "ground_truth",
+            "process_results": "<function vsibench_process_results at 0x4002c7d6a7a0>",
+            "description": "",
+            "target_delimiter": " ",
+            "fewshot_delimiter": "\n\n",
+            "num_fewshot": 0,
+            "metric_list": [
+                {
+                    "metric": "vsibench_score",
+                    "aggregation": "<function vsibench_aggregate_results at 0x4002c7d6aa70>",
+                    "higher_is_better": true
+                }
+            ],
+            "output_type": "generate_until",
+            "generation_kwargs": {
+                "max_new_tokens": 16,
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "num_beams": 1,
+                "do_sample": false,
+                "until": [
+                    "\n\n"
+                ]
+            },
+            "repeats": 1,
+            "should_decontaminate": false,
+            "metadata": [
+                {
+                    "version": 0.0
+                }
+            ],
+            "lmms_eval_specific_kwargs": {
+                "default": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Please answer the question using a single word or phrase."
+                },
+                "gemini_api": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "gpt4v": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "pre_prompt": "",
+                "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                "na_post_prompt": "Please answer the question using a single word or phrase."
+            }
+        }
+    },
+    "versions": {
+        "vsibench": "Yaml"
+    },
+    "n-shot": {
+        "vsibench": 0
+    },
+    "higher_is_better": {
+        "vsibench": {
+            "vsibench_score": true
+        }
+    },
+    "n-samples": {
+        "vsibench": {
+            "original": 5130,
+            "effective": 5130
+        }
+    },
+    "config": {
+        "model": "llava_onevision",
+        "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_base_stage2/checkpoint-1400,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
+        "batch_size": "1",
+        "batch_sizes": [],
+        "device": null,
+        "use_cache": null,
+        "limit": null,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": "",
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+    },
+    "git_hash": "2024013",
+    "date": "0326_0858"
+}

20250326/vsibench/0326_0845_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_92457b/vsibench.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d5a2f1772236230255ff7619d6a0e064c2c03b016e685aa1147081a2043323e
+size 13363860

20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/results.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+    "results": {
+        "vsibench": {
+            "alias": "vsibench",
+            "vsibench_score,none": 54.29945296982057,
+            "vsibench_score_stderr,none": "N/A"
+        }
+    },
+    "group_subtasks": {
+        "vsibench": []
+    },
+    "configs": {
+        "vsibench": {
+            "task": "vsibench",
+            "dataset_path": "nyu-visionx/VSI-Bench",
+            "dataset_kwargs": {
+                "token": true
+            },
+            "test_split": "test",
+            "full_docs": false,
+            "process_results_use_image": false,
+            "process_docs": "<function process_docs at 0x4002e619f1c0>",
+            "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002e619f880>",
+            "doc_to_text": "<function vsibench_doc_to_text at 0x4002e6720160>",
+            "doc_to_target": "ground_truth",
+            "process_results": "<function vsibench_process_results at 0x4002e6720d30>",
+            "description": "",
+            "target_delimiter": " ",
+            "fewshot_delimiter": "\n\n",
+            "num_fewshot": 0,
+            "metric_list": [
+                {
+                    "metric": "vsibench_score",
+                    "aggregation": "<function vsibench_aggregate_results at 0x4002e6721630>",
+                    "higher_is_better": true
+                }
+            ],
+            "output_type": "generate_until",
+            "generation_kwargs": {
+                "max_new_tokens": 16,
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "num_beams": 1,
+                "do_sample": false,
+                "until": [
+                    "\n\n"
+                ]
+            },
+            "repeats": 1,
+            "should_decontaminate": false,
+            "metadata": [
+                {
+                    "version": 0.0
+                }
+            ],
+            "lmms_eval_specific_kwargs": {
+                "default": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Please answer the question using a single word or phrase."
+                },
+                "gemini_api": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "gpt4v": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "pre_prompt": "",
+                "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                "na_post_prompt": "Please answer the question using a single word or phrase."
+            }
+        }
+    },
+    "versions": {
+        "vsibench": "Yaml"
+    },
+    "n-shot": {
+        "vsibench": 0
+    },
+    "higher_is_better": {
+        "vsibench": {
+            "vsibench_score": true
+        }
+    },
+    "n-samples": {
+        "vsibench": {
+            "original": 5130,
+            "effective": 5130
+        }
+    },
+    "config": {
+        "model": "llava_onevision",
+        "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_mlp/checkpoint-700,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
+        "batch_size": "1",
+        "batch_sizes": [],
+        "device": null,
+        "use_cache": null,
+        "limit": null,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": "",
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+    },
+    "git_hash": "2024013",
+    "date": "0326_0937"
+}

20250326/vsibench/0326_0907_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_c823c4/vsibench.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e88b2f65c2223629dde80b722b02d5261fe99d64829064e0178b8f8b7cd8f99a
+size 13363688

20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/results.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+    "results": {
+        "vsibench": {
+            "alias": "vsibench",
+            "vsibench_score,none": 51.91108578346994,
+            "vsibench_score_stderr,none": "N/A"
+        }
+    },
+    "group_subtasks": {
+        "vsibench": []
+    },
+    "configs": {
+        "vsibench": {
+            "task": "vsibench",
+            "dataset_path": "nyu-visionx/VSI-Bench",
+            "dataset_kwargs": {
+                "token": true
+            },
+            "test_split": "test",
+            "full_docs": false,
+            "process_results_use_image": false,
+            "process_docs": "<function process_docs at 0x4002d6ab31c0>",
+            "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002d6ab3880>",
+            "doc_to_text": "<function vsibench_doc_to_text at 0x4002d6e58160>",
+            "doc_to_target": "ground_truth",
+            "process_results": "<function vsibench_process_results at 0x4002d6e58d30>",
+            "description": "",
+            "target_delimiter": " ",
+            "fewshot_delimiter": "\n\n",
+            "num_fewshot": 0,
+            "metric_list": [
+                {
+                    "metric": "vsibench_score",
+                    "aggregation": "<function vsibench_aggregate_results at 0x4002d6e59630>",
+                    "higher_is_better": true
+                }
+            ],
+            "output_type": "generate_until",
+            "generation_kwargs": {
+                "max_new_tokens": 16,
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "num_beams": 1,
+                "do_sample": false,
+                "until": [
+                    "\n\n"
+                ]
+            },
+            "repeats": 1,
+            "should_decontaminate": false,
+            "metadata": [
+                {
+                    "version": 0.0
+                }
+            ],
+            "lmms_eval_specific_kwargs": {
+                "default": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Please answer the question using a single word or phrase."
+                },
+                "gemini_api": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "gpt4v": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "pre_prompt": "",
+                "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                "na_post_prompt": "Please answer the question using a single word or phrase."
+            }
+        }
+    },
+    "versions": {
+        "vsibench": "Yaml"
+    },
+    "n-shot": {
+        "vsibench": 0
+    },
+    "higher_is_better": {
+        "vsibench": {
+            "vsibench_score": true
+        }
+    },
+    "n-samples": {
+        "vsibench": {
+            "original": 5130,
+            "effective": 5130
+        }
+    },
+    "config": {
+        "model": "llava_onevision",
+        "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_cross_attn/checkpoint-700,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
+        "batch_size": "1",
+        "batch_sizes": [],
+        "device": null,
+        "use_cache": null,
+        "limit": null,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": "",
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+    },
+    "git_hash": "2024013",
+    "date": "0326_0940"
+}

20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_457ee9/vsibench.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c328b0617cfe868c1a5b5c6319c5e8aa33fe78e1f56f38cef797e386518b9733
+size 13363867

20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/results.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+    "results": {
+        "vsibench": {
+            "alias": "vsibench",
+            "vsibench_score,none": 56.559993086996506,
+            "vsibench_score_stderr,none": "N/A"
+        }
+    },
+    "group_subtasks": {
+        "vsibench": []
+    },
+    "configs": {
+        "vsibench": {
+            "task": "vsibench",
+            "dataset_path": "nyu-visionx/VSI-Bench",
+            "dataset_kwargs": {
+                "token": true
+            },
+            "test_split": "test",
+            "full_docs": false,
+            "process_results_use_image": false,
+            "process_docs": "<function process_docs at 0x4002b6c50a60>",
+            "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002b6c51120>",
+            "doc_to_text": "<function vsibench_doc_to_text at 0x4002b6c51c60>",
+            "doc_to_target": "ground_truth",
+            "process_results": "<function vsibench_process_results at 0x4002b6c527a0>",
+            "description": "",
+            "target_delimiter": " ",
+            "fewshot_delimiter": "\n\n",
+            "num_fewshot": 0,
+            "metric_list": [
+                {
+                    "metric": "vsibench_score",
+                    "aggregation": "<function vsibench_aggregate_results at 0x4002b6c52a70>",
+                    "higher_is_better": true
+                }
+            ],
+            "output_type": "generate_until",
+            "generation_kwargs": {
+                "max_new_tokens": 16,
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "num_beams": 1,
+                "do_sample": false,
+                "until": [
+                    "\n\n"
+                ]
+            },
+            "repeats": 1,
+            "should_decontaminate": false,
+            "metadata": [
+                {
+                    "version": 0.0
+                }
+            ],
+            "lmms_eval_specific_kwargs": {
+                "default": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Please answer the question using a single word or phrase."
+                },
+                "gemini_api": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "gpt4v": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "pre_prompt": "",
+                "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                "na_post_prompt": "Please answer the question using a single word or phrase."
+            }
+        }
+    },
+    "versions": {
+        "vsibench": "Yaml"
+    },
+    "n-shot": {
+        "vsibench": 0
+    },
+    "higher_is_better": {
+        "vsibench": {
+            "vsibench_score": true
+        }
+    },
+    "n-samples": {
+        "vsibench": {
+            "original": 5130,
+            "effective": 5130
+        }
+    },
+    "config": {
+        "model": "llava_onevision",
+        "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_base_stage2/checkpoint-2100,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
+        "batch_size": "1",
+        "batch_sizes": [],
+        "device": null,
+        "use_cache": null,
+        "limit": null,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": "",
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+    },
+    "git_hash": "2024013",
+    "date": "0326_0923"
+}

20250326/vsibench/0326_0910_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_81b450/vsibench.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b00fce1f7f47b0bf3f08741c5f4075dab9c9ef8948c36a5ab4f3cc4ed75d605
+size 13363948

20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/results.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+    "results": {
+        "vsibench": {
+            "alias": "vsibench",
+            "vsibench_score,none": 56.38881021576546,
+            "vsibench_score_stderr,none": "N/A"
+        }
+    },
+    "group_subtasks": {
+        "vsibench": []
+    },
+    "configs": {
+        "vsibench": {
+            "task": "vsibench",
+            "dataset_path": "nyu-visionx/VSI-Bench",
+            "dataset_kwargs": {
+                "token": true
+            },
+            "test_split": "test",
+            "full_docs": false,
+            "process_results_use_image": false,
+            "process_docs": "<function process_docs at 0x4002c19631c0>",
+            "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002c1963880>",
+            "doc_to_text": "<function vsibench_doc_to_text at 0x4002c2150160>",
+            "doc_to_target": "ground_truth",
+            "process_results": "<function vsibench_process_results at 0x4002c2150d30>",
+            "description": "",
+            "target_delimiter": " ",
+            "fewshot_delimiter": "\n\n",
+            "num_fewshot": 0,
+            "metric_list": [
+                {
+                    "metric": "vsibench_score",
+                    "aggregation": "<function vsibench_aggregate_results at 0x4002c2151630>",
+                    "higher_is_better": true
+                }
+            ],
+            "output_type": "generate_until",
+            "generation_kwargs": {
+                "max_new_tokens": 16,
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "num_beams": 1,
+                "do_sample": false,
+                "until": [
+                    "\n\n"
+                ]
+            },
+            "repeats": 1,
+            "should_decontaminate": false,
+            "metadata": [
+                {
+                    "version": 0.0
+                }
+            ],
+            "lmms_eval_specific_kwargs": {
+                "default": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Please answer the question using a single word or phrase."
+                },
+                "gemini_api": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "gpt4v": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "pre_prompt": "",
+                "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                "na_post_prompt": "Please answer the question using a single word or phrase."
+            }
+        }
+    },
+    "versions": {
+        "vsibench": "Yaml"
+    },
+    "n-shot": {
+        "vsibench": 0
+    },
+    "higher_is_better": {
+        "vsibench": {
+            "vsibench_score": true
+        }
+    },
+    "n-samples": {
+        "vsibench": {
+            "original": 5130,
+            "effective": 5130
+        }
+    },
+    "config": {
+        "model": "llava_onevision",
+        "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_cross_attn/checkpoint-1400,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
+        "batch_size": "1",
+        "batch_sizes": [],
+        "device": null,
+        "use_cache": null,
+        "limit": null,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": "",
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+    },
+    "git_hash": "2024013",
+    "date": "0326_1207"
+}

20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_4cd8ab/vsibench.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c24a29e8778b25d2978701148210e1664b2e8e7dc6b122c13a853a143a88ce90
+size 13363704

20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/results.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+    "results": {
+        "vsibench": {
+            "alias": "vsibench",
+            "vsibench_score,none": 58.30556075895211,
+            "vsibench_score_stderr,none": "N/A"
+        }
+    },
+    "group_subtasks": {
+        "vsibench": []
+    },
+    "configs": {
+        "vsibench": {
+            "task": "vsibench",
+            "dataset_path": "nyu-visionx/VSI-Bench",
+            "dataset_kwargs": {
+                "token": true
+            },
+            "test_split": "test",
+            "full_docs": false,
+            "process_results_use_image": false,
+            "process_docs": "<function process_docs at 0x4002eb26b1c0>",
+            "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002eb26b880>",
+            "doc_to_text": "<function vsibench_doc_to_text at 0x4002eb720160>",
+            "doc_to_target": "ground_truth",
+            "process_results": "<function vsibench_process_results at 0x4002eb720d30>",
+            "description": "",
+            "target_delimiter": " ",
+            "fewshot_delimiter": "\n\n",
+            "num_fewshot": 0,
+            "metric_list": [
+                {
+                    "metric": "vsibench_score",
+                    "aggregation": "<function vsibench_aggregate_results at 0x4002eb721630>",
+                    "higher_is_better": true
+                }
+            ],
+            "output_type": "generate_until",
+            "generation_kwargs": {
+                "max_new_tokens": 16,
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "num_beams": 1,
+                "do_sample": false,
+                "until": [
+                    "\n\n"
+                ]
+            },
+            "repeats": 1,
+            "should_decontaminate": false,
+            "metadata": [
+                {
+                    "version": 0.0
+                }
+            ],
+            "lmms_eval_specific_kwargs": {
+                "default": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Please answer the question using a single word or phrase."
+                },
+                "gemini_api": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "gpt4v": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "pre_prompt": "",
+                "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                "na_post_prompt": "Please answer the question using a single word or phrase."
+            }
+        }
+    },
+    "versions": {
+        "vsibench": "Yaml"
+    },
+    "n-shot": {
+        "vsibench": 0
+    },
+    "higher_is_better": {
+        "vsibench": {
+            "vsibench_score": true
+        }
+    },
+    "n-samples": {
+        "vsibench": {
+            "original": 5130,
+            "effective": 5130
+        }
+    },
+    "config": {
+        "model": "llava_onevision",
+        "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_mlp/checkpoint-1400,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
+        "batch_size": "1",
+        "batch_sizes": [],
+        "device": null,
+        "use_cache": null,
+        "limit": null,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": "",
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+    },
+    "git_hash": "2024013",
+    "date": "0326_1208"
+}

20250326/vsibench/0326_1137_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_ac8a7c/vsibench.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50d8393c30f75d0f80d454be88a5da62050adedbe979c05b49abb80c028c23bf
+size 13363849

20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/results.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+    "results": {
+        "vsibench": {
+            "alias": "vsibench",
+            "vsibench_score,none": 58.6854936357191,
+            "vsibench_score_stderr,none": "N/A"
+        }
+    },
+    "group_subtasks": {
+        "vsibench": []
+    },
+    "configs": {
+        "vsibench": {
+            "task": "vsibench",
+            "dataset_path": "nyu-visionx/VSI-Bench",
+            "dataset_kwargs": {
+                "token": true
+            },
+            "test_split": "test",
+            "full_docs": false,
+            "process_results_use_image": false,
+            "process_docs": "<function process_docs at 0x4002b87d31c0>",
+            "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002b87d3880>",
+            "doc_to_text": "<function vsibench_doc_to_text at 0x4002b7364160>",
+            "doc_to_target": "ground_truth",
+            "process_results": "<function vsibench_process_results at 0x4002b7364d30>",
+            "description": "",
+            "target_delimiter": " ",
+            "fewshot_delimiter": "\n\n",
+            "num_fewshot": 0,
+            "metric_list": [
+                {
+                    "metric": "vsibench_score",
+                    "aggregation": "<function vsibench_aggregate_results at 0x4002b7365630>",
+                    "higher_is_better": true
+                }
+            ],
+            "output_type": "generate_until",
+            "generation_kwargs": {
+                "max_new_tokens": 16,
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "num_beams": 1,
+                "do_sample": false,
+                "until": [
+                    "\n\n"
+                ]
+            },
+            "repeats": 1,
+            "should_decontaminate": false,
+            "metadata": [
+                {
+                    "version": 0.0
+                }
+            ],
+            "lmms_eval_specific_kwargs": {
+                "default": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Please answer the question using a single word or phrase."
+                },
+                "gemini_api": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "gpt4v": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "pre_prompt": "",
+                "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                "na_post_prompt": "Please answer the question using a single word or phrase."
+            }
+        }
+    },
+    "versions": {
+        "vsibench": "Yaml"
+    },
+    "n-shot": {
+        "vsibench": 0
+    },
+    "higher_is_better": {
+        "vsibench": {
+            "vsibench_score": true
+        }
+    },
+    "n-samples": {
+        "vsibench": {
+            "original": 5130,
+            "effective": 5130
+        }
+    },
+    "config": {
+        "model": "llava_onevision",
+        "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_mlp/checkpoint-2100,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
+        "batch_size": "1",
+        "batch_sizes": [],
+        "device": null,
+        "use_cache": null,
+        "limit": null,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": "",
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+    },
+    "git_hash": "2024013",
+    "date": "0326_1539"
+}

20250326/vsibench/0326_1508_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_5e02ce/vsibench.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6be787f64c3844cfa41bdba4f70ddfe5e2dd808c08cb5e855fd76e3a9f779cc6
+size 13363781

20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/results.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+    "results": {
+        "vsibench": {
+            "alias": "vsibench",
+            "vsibench_score,none": 57.88343514494699,
+            "vsibench_score_stderr,none": "N/A"
+        }
+    },
+    "group_subtasks": {
+        "vsibench": []
+    },
+    "configs": {
+        "vsibench": {
+            "task": "vsibench",
+            "dataset_path": "nyu-visionx/VSI-Bench",
+            "dataset_kwargs": {
+                "token": true
+            },
+            "test_split": "test",
+            "full_docs": false,
+            "process_results_use_image": false,
+            "process_docs": "<function process_docs at 0x4002f20ef1c0>",
+            "doc_to_visual": "<function vsibench_doc_to_visual at 0x4002f20ef880>",
+            "doc_to_text": "<function vsibench_doc_to_text at 0x4002f2b94160>",
+            "doc_to_target": "ground_truth",
+            "process_results": "<function vsibench_process_results at 0x4002f2b94d30>",
+            "description": "",
+            "target_delimiter": " ",
+            "fewshot_delimiter": "\n\n",
+            "num_fewshot": 0,
+            "metric_list": [
+                {
+                    "metric": "vsibench_score",
+                    "aggregation": "<function vsibench_aggregate_results at 0x4002f2b95630>",
+                    "higher_is_better": true
+                }
+            ],
+            "output_type": "generate_until",
+            "generation_kwargs": {
+                "max_new_tokens": 16,
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "num_beams": 1,
+                "do_sample": false,
+                "until": [
+                    "\n\n"
+                ]
+            },
+            "repeats": 1,
+            "should_decontaminate": false,
+            "metadata": [
+                {
+                    "version": 0.0
+                }
+            ],
+            "lmms_eval_specific_kwargs": {
+                "default": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Please answer the question using a single word or phrase."
+                },
+                "gemini_api": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "gpt4v": {
+                    "pre_prompt": "",
+                    "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                    "na_post_prompt": "Do not response anything other than a single number!"
+                },
+                "pre_prompt": "",
+                "mca_post_prompt": "Answer with the option's letter from the given choices directly.",
+                "na_post_prompt": "Please answer the question using a single word or phrase."
+            }
+        }
+    },
+    "versions": {
+        "vsibench": "Yaml"
+    },
+    "n-shot": {
+        "vsibench": 0
+    },
+    "higher_is_better": {
+        "vsibench": {
+            "vsibench_score": true
+        }
+    },
+    "n-samples": {
+        "vsibench": {
+            "original": 5130,
+            "effective": 5130
+        }
+    },
+    "config": {
+        "model": "llava_onevision",
+        "model_args": "pretrained=/scratch/08367/zhiwen/work_dirs_auto_eval/llavanext-google_siglip-so400m-patch14-384-Qwen_Qwen2-7B-Instruct-03_26_cut3r_stage2_cross_attn/checkpoint-2100,attn_implementation=flash_attention_2,conv_template=qwen_1_5,model_name=llava_qwen_lora,max_frames_num=32,model_base=LLaVA-NeXT/checkpoints/LLaVA-Video-7B-Qwen2",
+        "batch_size": "1",
+        "batch_sizes": [],
+        "device": null,
+        "use_cache": null,
+        "limit": null,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": "",
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+    },
+    "git_hash": "2024013",
+    "date": "0326_1545"
+}

20250326/vsibench/0326_1515_llava_one_vision_llava_qwen_lora_ov_32f_llava_onevision_model_args_e62631/vsibench.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:557fca3e71233278a98ae8bd0eb0068f2eb04028eed56b2f8c74e7002817aab2
+size 13363872