Ertugrul
/

Qwen2.5-VL-7B-Captioner-Relaxed

Image-Text-to-Text

feature-extraction

text-generation-inference

Model card Files Files and versions Community

Ertugrul commited on Mar 22

Commit

09c7ef9

·

verified ·

1 Parent(s): 4561018

Update README.md

Files changed (1) hide show

README.md +25 -0

README.md CHANGED Viewed

@@ -43,6 +43,7 @@ from transformers import (
 )
 model_id = "Ertugrul/Qwen2.5-VL-7B-Captioner-Relaxed"
 image_path = "path/to/your/image.jpg"
@@ -57,6 +58,30 @@ model = AutoModelForImageTextToText.from_pretrained(
   attn_implementation="flash_attention_2", # Use "flash_attention_2" when running on Ampere or newer GPU or use "eager" for older GPUs
 )
 # you can change the min and max pixels to fit your needs to decrease compute cost to trade off quality
 min_pixels = 256*28*28
 max_pixels = 1280*28*28

 )
 model_id = "Ertugrul/Qwen2.5-VL-7B-Captioner-Relaxed"
 image_path = "path/to/your/image.jpg"
   attn_implementation="flash_attention_2", # Use "flash_attention_2" when running on Ampere or newer GPU or use "eager" for older GPUs
 )
+#### For lower precision less than 12GB VRAM ####
+# Configure 4-bit quantization using BitsAndBytesConfig
+#from transformers import BitsAndBytesConfig
+# quantization_config = BitsAndBytesConfig(
+#         load_in_4bit=True,
+#         bnb_4bit_use_double_quant=True,
+#         bnb_4bit_quant_type="nf4",
+#         bnb_4bit_compute_dtype=torch.bfloat16,
+#         bnb_4bit_quant_storage=torch.bfloat16,
+#     )
+# model = AutoModelForImageTextToText.from_pretrained(
+#     model_id,
+#     device_map="auto",
+#     torch_dtype=torch.bfloat16,
+#     attn_implementation="flash_attention_2", # Use "flash_attention_2" when running on Ampere or newer GPU or use "eager" for older GPUs
+#     quantization_config=quantization_config,  # Use BitsAndBytesConfig instead of load_in_4bit
+# )
+########################################################################
 # you can change the min and max pixels to fit your needs to decrease compute cost to trade off quality
 min_pixels = 256*28*28
 max_pixels = 1280*28*28