Feature Extraction
Transformers
Safetensors
English
Chinese
emova
Omni-modal-LLM
Multi-modal-LLM
Emotional-spoken-dialogue
custom_code
Eval Results
File size: 753 Bytes
88957dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
{
  "auto_map": {
    "AutoImageProcessor": "image_processing_emova.EMOVAImageProcessor",
    "AutoProcessor": "processing_emova.EMOVAProcessor"
  },
  "do_center_crop": true,
  "do_convert_rgb": true,
  "do_normalize": true,
  "do_pad": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.48145466,
    0.4578275,
    0.40821073
  ],
  "image_processor_type": "EMOVAImageProcessor",
  "image_std": [
    0.26862954,
    0.26130258,
    0.27577711
  ],
  "max_pixels": 3211264,
  "merge_size": 2,
  "min_pixels": 3136,
  "patch_size": 14,
  "processor_class": "EMOVAProcessor",
  "resample": 3,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "max_pixels": 3211264,
    "min_pixels": 3136
  },
  "temporal_patch_size": 2
}