{ "model_cfg": { "embed_dim": 768, "vision_cfg": { "image_size": 336, "layers": 24, "width": 1024, "patch_size": 14, "no_ln_pre": true, "pool_type": "avg", "final_ln_after_pool": true }, "text_cfg": { "context_length": 128, "vocab_size": 32000, "vocab_path": "/home/zwang615/code/open_source/Double_Visual_Defense/CLIP_benchmark/clip_benchmark/open_clip/bert_base_vocab_bos_eos.txt", "width": 768, "heads": 12, "layers": 12, "pool_type": "last", "no_causal_mask": false } }, "preprocess_cfg": { "mean": [ 0.485, 0.456, 0.406 ], "std": [ 0.229, 0.224, 0.225 ], "interpolation": "bilinear", "resize_mode": "squash" } }