{ "architectures": [ "SiglipForImageClassification" ], "id2label": { "0": "brick", "1": "carpet", "2": "ceramic", "3": "fabric", "4": "foliage", "5": "food", "6": "glass", "7": "hair", "8": "leather", "9": "metal", "10": "mirror", "11": "other", "12": "painted", "13": "paper", "14": "plastic", "15": "polishedstone", "16": "skin", "17": "sky", "18": "stone", "19": "tile", "20": "wallpaper", "21": "water", "22": "wood" }, "initializer_factor": 1.0, "label2id": { "brick": 0, "carpet": 1, "ceramic": 2, "fabric": 3, "foliage": 4, "food": 5, "glass": 6, "hair": 7, "leather": 8, "metal": 9, "mirror": 10, "other": 11, "painted": 12, "paper": 13, "plastic": 14, "polishedstone": 15, "skin": 16, "sky": 17, "stone": 18, "tile": 19, "wallpaper": 20, "water": 21, "wood": 22 }, "model_type": "siglip", "problem_type": "single_label_classification", "text_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "max_position_embeddings": 64, "model_type": "siglip_text_model", "num_attention_heads": 12, "num_hidden_layers": 12, "projection_size": 768, "torch_dtype": "float32", "vocab_size": 256000 }, "torch_dtype": "float32", "transformers_version": "4.50.3", "vision_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "image_size": 224, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "model_type": "siglip_vision_model", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "patch_size": 16, "torch_dtype": "float32" } }