Fancy-MLLM commited on
Commit
67ae540
·
verified ·
1 Parent(s): 4c063f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -87
app.py CHANGED
@@ -1,108 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
 
3
  from threading import Thread
4
- from qwen_vl_utils import process_vision_info
5
- import torch
6
  import time
 
 
7
 
8
- # Check if a GPU is available
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
10
 
11
- local_path = "Fancy-MLLM/R1-OneVision-7B"
 
 
 
12
 
13
- # Load the model on the appropriate device (GPU if available, otherwise CPU)
14
- model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
15
- local_path, torch_dtype="auto", device_map=device
16
- )
17
- processor = AutoProcessor.from_pretrained(local_path)
 
 
 
 
 
 
 
 
 
 
18
 
19
- def generate_output(image, text, button_click):
20
- # Prepare input data
21
  messages = [
22
  {
23
  "role": "user",
24
  "content": [
25
- {"type": "image", "image": image, 'min_pixels': 1003520, 'max_pixels': 12845056},
26
  {"type": "text", "text": text},
27
  ],
28
  }
29
  ]
30
-
31
- # Prepare inputs for the model
32
- text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
- image_inputs, video_inputs = process_vision_info(messages)
34
  inputs = processor(
35
- text=[text_input],
36
- images=image_inputs,
37
- videos=video_inputs,
38
- padding=True,
39
  return_tensors="pt",
40
- )
41
-
42
- # Move inputs to the same device as the model
43
- inputs = inputs.to(model.device)
44
 
 
45
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
46
- generation_kwargs = dict(
47
- **inputs,
48
- streamer=streamer,
49
- max_new_tokens=4096,
50
- top_p=0.001,
51
- top_k=1,
52
- temperature=0.01,
53
- repetition_penalty=1.0,
54
- )
55
-
56
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
57
  thread.start()
58
- generated_text = ''
59
-
60
- try:
61
- for new_text in streamer:
62
- generated_text += new_text
63
- yield f"‎{generated_text}"
64
- except Exception as e:
65
- print(f"Error: {e}")
66
- yield f"Error occurred: {str(e)}"
67
-
68
- Css = """
69
- #output-markdown {
70
- overflow-y: auto;
71
- white-space: pre-wrap;
72
- word-wrap: break-word;
73
- }
74
- #output-markdown .math {
75
- overflow-x: auto;
76
- max-width: 100%;
77
- }
78
- .markdown-text {
79
- white-space: pre-wrap;
80
- word-wrap: break-word;
81
- }
82
- .markdown-output {
83
- min-height: 20vh;
84
- max-width: 100%;
85
- overflow-y: auto;
86
- }
87
- #qwen-md .katex-display { display: inline; }
88
- #qwen-md .katex-display>.katex { display: inline; }
89
- #qwen-md .katex-display>.katex>.katex-html { display: inline; }
90
- """
91
-
92
- with gr.Blocks(css=Css) as demo:
93
- gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
94
-
95
- with gr.Row():
96
- with gr.Column():
97
- input_image = gr.Image(type="pil", label="Upload") # **改回 PIL 处理**
98
- input_text = gr.Textbox(label="Input your question")
99
- with gr.Row():
100
- clear_btn = gr.ClearButton([input_image, input_text])
101
- submit_btn = gr.Button("Submit", variant="primary")
102
-
103
- with gr.Column():
104
- output_text = gr.Markdown(elem_id="qwen-md", container=True, elem_classes="markdown-output")
105
-
106
- submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text)
107
-
108
- demo.launch(share=False)
 
1
+ # import gradio as gr
2
+ # from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
3
+ # from threading import Thread
4
+ # from qwen_vl_utils import process_vision_info
5
+ # import torch
6
+ # import time
7
+
8
+ # # Check if a GPU is available
9
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ # local_path = "Fancy-MLLM/R1-OneVision-7B"
12
+
13
+ # # Load the model on the appropriate device (GPU if available, otherwise CPU)
14
+ # model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
15
+ # local_path, torch_dtype="auto", device_map=device
16
+ # )
17
+ # processor = AutoProcessor.from_pretrained(local_path)
18
+
19
+ # def generate_output(image, text, button_click):
20
+ # # Prepare input data
21
+ # messages = [
22
+ # {
23
+ # "role": "user",
24
+ # "content": [
25
+ # {"type": "image", "image": image, 'min_pixels': 1003520, 'max_pixels': 12845056},
26
+ # {"type": "text", "text": text},
27
+ # ],
28
+ # }
29
+ # ]
30
+
31
+ # # Prepare inputs for the model
32
+ # text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
+ # image_inputs, video_inputs = process_vision_info(messages)
34
+ # inputs = processor(
35
+ # text=[text_input],
36
+ # images=image_inputs,
37
+ # videos=video_inputs,
38
+ # padding=True,
39
+ # return_tensors="pt",
40
+ # )
41
+
42
+ # # Move inputs to the same device as the model
43
+ # inputs = inputs.to(model.device)
44
+
45
+ # streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
46
+ # generation_kwargs = dict(
47
+ # **inputs,
48
+ # streamer=streamer,
49
+ # max_new_tokens=4096,
50
+ # top_p=0.001,
51
+ # top_k=1,
52
+ # temperature=0.01,
53
+ # repetition_penalty=1.0,
54
+ # )
55
+
56
+ # thread = Thread(target=model.generate, kwargs=generation_kwargs)
57
+ # thread.start()
58
+ # generated_text = ''
59
+
60
+ # try:
61
+ # for new_text in streamer:
62
+ # generated_text += new_text
63
+ # yield f"‎{generated_text}"
64
+ # except Exception as e:
65
+ # print(f"Error: {e}")
66
+ # yield f"Error occurred: {str(e)}"
67
+
68
+ # Css = """
69
+ # #output-markdown {
70
+ # overflow-y: auto;
71
+ # white-space: pre-wrap;
72
+ # word-wrap: break-word;
73
+ # }
74
+ # #output-markdown .math {
75
+ # overflow-x: auto;
76
+ # max-width: 100%;
77
+ # }
78
+ # .markdown-text {
79
+ # white-space: pre-wrap;
80
+ # word-wrap: break-word;
81
+ # }
82
+ # .markdown-output {
83
+ # min-height: 20vh;
84
+ # max-width: 100%;
85
+ # overflow-y: auto;
86
+ # }
87
+ # #qwen-md .katex-display { display: inline; }
88
+ # #qwen-md .katex-display>.katex { display: inline; }
89
+ # #qwen-md .katex-display>.katex>.katex-html { display: inline; }
90
+ # """
91
+
92
+ # with gr.Blocks(css=Css) as demo:
93
+ # gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
94
+
95
+ # with gr.Row():
96
+ # with gr.Column():
97
+ # input_image = gr.Image(type="pil", label="Upload") # **改回 PIL 处理**
98
+ # input_text = gr.Textbox(label="Input your question")
99
+ # with gr.Row():
100
+ # clear_btn = gr.ClearButton([input_image, input_text])
101
+ # submit_btn = gr.Button("Submit", variant="primary")
102
+
103
+ # with gr.Column():
104
+ # output_text = gr.Markdown(elem_id="qwen-md", container=True, elem_classes="markdown-output")
105
+
106
+ # submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text)
107
+
108
+ # demo.launch(share=False)
109
+
110
+
111
  import gradio as gr
112
+ from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
113
+ from transformers.image_utils import load_image
114
  from threading import Thread
 
 
115
  import time
116
+ import torch
117
+ import spaces
118
 
119
+ MODEL_ID = "Fancy-MLLM/R1-OneVision-7B"
120
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
121
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
122
+ MODEL_ID,
123
+ trust_remote_code=True,
124
+ torch_dtype=torch.bfloat16
125
+ ).to("cuda").eval()
126
 
127
+ @spaces.GPU
128
+ def model_inference(input_dict, history):
129
+ text = input_dict["text"]
130
+ files = input_dict["files"]
131
 
132
+ # Load images if provided
133
+ if len(files) > 1:
134
+ images = [load_image(image) for image in files]
135
+ elif len(files) == 1:
136
+ images = [load_image(files[0])]
137
+ else:
138
+ images = []
139
+
140
+ # Validate input
141
+ if text == "" and not images:
142
+ gr.Error("Please input a query and optionally image(s).")
143
+ return
144
+ if text == "" and images:
145
+ gr.Error("Please input a text query along with the image(s).")
146
+ return
147
 
148
+ # Prepare messages for the model
 
149
  messages = [
150
  {
151
  "role": "user",
152
  "content": [
153
+ *[{"type": "image", "image": image} for image in images],
154
  {"type": "text", "text": text},
155
  ],
156
  }
157
  ]
158
+
159
+ # Apply chat template and process inputs
160
+ prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
161
  inputs = processor(
162
+ text=[prompt],
163
+ images=images if images else None,
 
 
164
  return_tensors="pt",
165
+ padding=True,
166
+ ).to("cuda")
 
 
167
 
168
+ # Set up streamer for real-time output
169
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
170
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
171
+
172
+ # Start generation in a separate thread
 
 
 
 
 
 
 
173
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
174
  thread.start()
175
+
176
+ # Stream the output
177
+ buffer = ""
178
+ yield "Thinking..."
179
+ for new_text in streamer:
180
+ buffer += new_text
181
+ time.sleep(0.01)
182
+ yield buffer
183
+
184
+
185
+ demo = gr.ChatInterface(
186
+ fn=model_inference,
187
+ description="# **Fancy-MLLM/R1-OneVision-7B**",
188
+ textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
189
+ stop_btn="Stop Generation",
190
+ multimodal=True,
191
+ cache_examples=False,
192
+ )
193
+
194
+ demo.launch(debug=True)