File size: 2,457 Bytes
f659e63 0b0dee2 f659e63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
---
license: mit
---
# **Phi-3.5 Vision OpenVINO INT4 Model**
<b><span style="text-decoration:underline">Note: This is unoffical version,just for test and dev.</span></b>
This is the OpenVINO format INT 4 quantized version of the Microsoft Phi-3.5 VISIOn. You can use run this script to convert
```python
import requests
from pathlib import Path
if not Path("ov_phi3_vision.py").exists():
r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/phi-3-vision/ov_phi3_vision.py")
open("ov_phi3_vision.py", "w").write(r.text)
if not Path("gradio_helper.py").exists():
r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/phi-3-vision/gradio_helper.py")
open("gradio_helper.py", "w").write(r.text)
if not Path("notebook_utils.py").exists():
r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py")
open("notebook_utils.py", "w").write(r.text)
from ov_phi3_vision import convert_phi3_model
from pathlib import Path
import nncf
model_id = "microsoft/Phi-3.5-vision-instruct"
out_dir = Path("Save Your Phi-3.5-vision OpenVINO INT4 PATH")
compression_configuration = {
"mode": nncf.CompressWeightsMode.INT4_SYM,
"group_size": 64,
"ratio": 0.6,
}
convert_phi3_model(model_id, out_dir, compression_configuration)
```
## **Sample Code**
```python
from ov_phi3_vision import OvPhi3Vision
from notebook_utils import device_widget
device = device_widget(default="GPU", exclude=["NPU"])
out_dir = Path("Your Phi-3.5-vision OpenVINO INT4 PATH")
model = OvPhi3Vision(out_dir, device.value)
import requests
from PIL import Image
image = Image.open(r"Your local image Path")
from transformers import AutoProcessor, TextStreamer
messages = [
{"role": "user", "content": "<|image_1|>\nPlease analyze the image"},
]
processor = AutoProcessor.from_pretrained(out_dir, trust_remote_code=True)
prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = processor(prompt, [image], return_tensors="pt")
generation_args = {"max_new_tokens": 500, "do_sample": False, "streamer": TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)}
print("Analyze:")
generate_ids = model.generate(**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args)
```
|