{ "_attn_implementation_autoset": true, "apply_sampling": true, "architectures": [ "STIBPriorExtractor" ], "audio_in_channel": 8, "audio_patch_size": [ 2, 2 ], "dropout": 0.0, "enable_flash_attn": true, "enable_layernorm_kernel": true, "encode_va": false, "feedforward_scale": 4, "hidden_size": 512, "imagebind_ckpt_path": "./checkpoints", "model_type": "STIBPriorExtractor", "nhead": 4, "num_decoder_layers": 4, "num_encoder_layers": 4, "out_dim": 128, "pred_onset": false, "qk_norm": true, "spatial_token_num": 32, "temporal_token_num": 32, "text_emb_dim": 1024, "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "va_drop_path": 0.0, "va_mlp_ratio": 4.0, "va_num_heads": 4, "video_in_channel": 4, "video_input_sq_size": 512, "video_patch_size": [ 1, 2, 2 ] }