{"embed_dim": 512, "vision_cfg": {"image_size": 224, "layers": 12, "width": 768, "patch_size": 32}, "text_cfg": {"hf_model_name": "xlm-roberta-base", "hf_tokenizer_name": "xlm-roberta-base", "hf_pooler_type": "mean_pooler"}}