{ "_name_or_path": "DeepGlint-AI/MLCD-Seg", "add_faster_video": false, "add_time_instruction": false, "architectures": [ "MLCDSegForCausalLM" ], "auto_map": { "AutoConfig": "mlcd_seg.MLCDSegConfig", "AutoModel": "mlcd_seg.MLCDSegForCausalLM", "AutoModelForCausalLM": "mlcd_seg.MLCDSegForCausalLM" }, "attn_implementation": "flash_attention_2", "attention_dropout": 0.0, "bos_token_id": 151643, "eos_token_id": 151645, "faster_token_stride": 10, "force_sample": false, "hidden_act": "silu", "hidden_size": 3584, "image_aspect_ratio": "anyres", "image_crop_resolution": null, "image_grid_pinpoints": [ [ 336, 336 ], [ 336, 672 ], [ 336, 1008 ], [ 336, 1344 ], [ 336, 1680 ], [ 336, 2016 ], [ 672, 336 ], [ 672, 672 ], [ 672, 1008 ], [ 672, 1344 ], [ 672, 1680 ], [ 672, 2016 ], [ 1008, 336 ], [ 1008, 672 ], [ 1008, 1008 ], [ 1008, 1344 ], [ 1008, 1680 ], [ 1008, 2016 ], [ 1344, 336 ], [ 1344, 672 ], [ 1344, 1008 ], [ 1344, 1344 ], [ 1344, 1680 ], [ 1344, 2016 ], [ 1680, 336 ], [ 1680, 672 ], [ 1680, 1008 ], [ 1680, 1344 ], [ 1680, 1680 ], [ 1680, 2016 ], [ 2016, 336 ], [ 2016, 672 ], [ 2016, 1008 ], [ 2016, 1344 ], [ 2016, 1680 ], [ 2016, 2016 ] ], "image_split_resolution": null, "initializer_range": 0.02, "intermediate_size": 18944, "max_position_embeddings": 32768, "max_window_layers": 28, "mm_hidden_size": 1024, "mm_newline_position": "grid", "mm_patch_merge_type": "spatial_unpad", "mm_projector_lr": null, "mm_projector_type": "mlp2x_gelu", "mm_resampler_type": null, "mm_spatial_pool_mode": "bilinear", "mm_spatial_pool_stride": null, "mm_tunable_parts": "mm_vision_tower,mm_mlp_adapter,mm_language_model,sam", "mm_use_im_patch_token": false, "mm_use_im_start_end": false, "mm_vision_select_feature": "patch", "mm_vision_select_layer": -2, "mm_vision_tower_lr": 2e-06, "vision_tower_config": { "_name_or_path": "", "architectures": [ "CLIPVisionModel" ], "attention_dropout": 0.0, "hidden_act": "quick_gelu", "hidden_size": 1024, "image_size": 336, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 4096, "layer_norm_eps": 1e-05, "model_type": "clip_vision_model", "num_attention_heads": 16, "num_channels": 3, "num_hidden_layers": 24, "patch_size": 14, "projection_dim": 1024, "torch_dtype": "float32", "transformers_version": "4.44.0" }, "vision_tower_processor": { "crop_size": 336, "do_center_crop": true, "do_normalize": true, "do_resize": true, "feature_extractor_type": "CLIPFeatureExtractor", "image_mean": [ 0.48145466, 0.4578275, 0.40821073 ], "image_std": [ 0.26862954, 0.26130258, 0.27577711 ], "resample": 3, "size": 336 }, "model_type": "qwen2", "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "pos_skipping_range": 4096, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "tokenizer_model_max_length": 32768, "tokenizer_padding_side": "right", "torch_dtype": "bfloat16", "transformers_version": "4.47.0", "use_cache": true, "use_mm_proj": true, "use_pos_skipping": false, "use_sliding_window": false, "vision_tower_pretrained": null, "vocab_size": 151666 }