WangBiao commited on
Commit
b304f64
·
verified ·
1 Parent(s): b0c76bc

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +70 -3
README.md CHANGED
@@ -1,3 +1,70 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ datasets:
4
+ - WangBiao/R1-Track-5k
5
+ base_model:
6
+ - Qwen/Qwen2.5-VL-3B-Instruct
7
+ ---
8
+
9
+ # Demo
10
+ ```python
11
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
12
+ from qwen_vl_utils import process_vision_info
13
+
14
+
15
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
16
+ "WangBiao/R1-Track-GRPO-wo-Think", torch_dtype="auto", device_map="auto"
17
+ )
18
+
19
+
20
+ min_pixels = 336*336
21
+ max_pixels = 336*336
22
+ processor = AutoProcessor.from_pretrained("WangBiao/R1-Track-GRPO-wo-Think", min_pixels=min_pixels, max_pixels=max_pixels)
23
+
24
+
25
+ messages = [
26
+ {
27
+ "role": "system",
28
+ "content": "You are a helpful assistant.",
29
+ },
30
+ {
31
+ "role": "user",
32
+ "content": [
33
+ {
34
+ "type": "image",
35
+ "image": "image_1.jpg",
36
+ },
37
+ {
38
+ "type": "image",
39
+ "image": "image_2.jpg",
40
+ },
41
+ {"type": "text", "text": "Please identify the target specified by the bounding box [241,66,329,154] in the first image and locate it in the second image. Return the coordinates in [x_min,y_min,x_max,y_max] format."},
42
+ ],
43
+ }
44
+ ]
45
+
46
+
47
+
48
+ text = processor.apply_chat_template(
49
+ messages, tokenize=False, add_generation_prompt=True
50
+ )
51
+
52
+ image_inputs, video_inputs = process_vision_info(messages)
53
+ inputs = processor(
54
+ text=[text],
55
+ images=image_inputs,
56
+ videos=video_inputs,
57
+ padding=True,
58
+ return_tensors="pt",
59
+ )
60
+ inputs = inputs.to(model.device)
61
+
62
+ generated_ids = model.generate(**inputs, max_new_tokens=128)
63
+ generated_ids_trimmed = [
64
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
65
+ ]
66
+ output_text = processor.batch_decode(
67
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
68
+ )
69
+ print(output_text)
70
+ ```