Fancy-MLLM commited on
Commit
4c6ecb5
·
verified ·
1 Parent(s): 7e3ed30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -22
app.py CHANGED
@@ -108,15 +108,104 @@
108
  # demo.launch(share=False)
109
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  import gradio as gr
112
  from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
113
  from transformers.image_utils import load_image
114
  from threading import Thread
115
  import time
116
  import torch
117
- import spaces
118
 
119
- MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
 
120
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
121
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
122
  MODEL_ID,
@@ -124,12 +213,11 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
124
  torch_dtype=torch.bfloat16
125
  ).to("cuda").eval()
126
 
127
- @spaces.GPU(duration=200)
128
  def model_inference(input_dict, history):
129
  text = input_dict["text"]
130
  files = input_dict["files"]
131
 
132
- # Load images if provided
133
  if len(files) > 1:
134
  images = [load_image(image) for image in files]
135
  elif len(files) == 1:
@@ -137,15 +225,13 @@ def model_inference(input_dict, history):
137
  else:
138
  images = []
139
 
140
- # Validate input
141
  if text == "" and not images:
142
- gr.Error("Please input a query and optionally image(s).")
143
- return
144
  if text == "" and images:
145
- gr.Error("Please input a text query along with the image(s).")
146
- return
147
 
148
- # Prepare messages for the model
149
  messages = [
150
  {
151
  "role": "user",
@@ -156,7 +242,7 @@ def model_inference(input_dict, history):
156
  }
157
  ]
158
 
159
- # Apply chat template and process inputs
160
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
161
  inputs = processor(
162
  text=[prompt],
@@ -165,33 +251,44 @@ def model_inference(input_dict, history):
165
  padding=True,
166
  ).to("cuda")
167
 
168
- # Set up streamer for real-time output
 
 
 
169
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
170
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
 
 
 
 
171
 
172
- # Start generation in a separate thread
173
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
174
  thread.start()
175
 
176
- # Stream the output
177
  buffer = ""
178
- yield "Thinking..."
179
  for new_text in streamer:
180
  buffer += new_text
181
- time.sleep(0.01)
182
  yield buffer
183
 
 
184
  examples = [
185
  [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}]
186
  ]
187
 
188
- demo = gr.ChatInterface(
 
189
  fn=model_inference,
190
  description="# **🦖 Fancy-MLLM/R1-OneVision-7B**",
191
  examples=examples,
192
- textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
193
- stop_btn="Stop Generation",
194
- multimodal=True,
 
 
 
195
  cache_examples=False,
196
  )
197
 
 
108
  # demo.launch(share=False)
109
 
110
 
111
+ # import gradio as gr
112
+ # from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
113
+ # from transformers.image_utils import load_image
114
+ # from threading import Thread
115
+ # import time
116
+ # import torch
117
+ # import spaces
118
+
119
+ # MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
120
+ # processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
121
+ # model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
122
+ # MODEL_ID,
123
+ # trust_remote_code=True,
124
+ # torch_dtype=torch.bfloat16
125
+ # ).to("cuda").eval()
126
+
127
+ # @spaces.GPU(duration=200)
128
+ # def model_inference(input_dict, history):
129
+ # text = input_dict["text"]
130
+ # files = input_dict["files"]
131
+
132
+ # # Load images if provided
133
+ # if len(files) > 1:
134
+ # images = [load_image(image) for image in files]
135
+ # elif len(files) == 1:
136
+ # images = [load_image(files[0])]
137
+ # else:
138
+ # images = []
139
+
140
+ # # Validate input
141
+ # if text == "" and not images:
142
+ # gr.Error("Please input a query and optionally image(s).")
143
+ # return
144
+ # if text == "" and images:
145
+ # gr.Error("Please input a text query along with the image(s).")
146
+ # return
147
+
148
+ # # Prepare messages for the model
149
+ # messages = [
150
+ # {
151
+ # "role": "user",
152
+ # "content": [
153
+ # *[{"type": "image", "image": image} for image in images],
154
+ # {"type": "text", "text": text},
155
+ # ],
156
+ # }
157
+ # ]
158
+
159
+ # # Apply chat template and process inputs
160
+ # prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
161
+ # inputs = processor(
162
+ # text=[prompt],
163
+ # images=images if images else None,
164
+ # return_tensors="pt",
165
+ # padding=True,
166
+ # ).to("cuda")
167
+
168
+ # # Set up streamer for real-time output
169
+ # streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
170
+ # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
171
+
172
+ # # Start generation in a separate thread
173
+ # thread = Thread(target=model.generate, kwargs=generation_kwargs)
174
+ # thread.start()
175
+
176
+ # # Stream the output
177
+ # buffer = ""
178
+ # yield "Thinking..."
179
+ # for new_text in streamer:
180
+ # buffer += new_text
181
+ # time.sleep(0.01)
182
+ # yield buffer
183
+
184
+ # examples = [
185
+ # [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}]
186
+ # ]
187
+
188
+ # demo = gr.ChatInterface(
189
+ # fn=model_inference,
190
+ # description="# **🦖 Fancy-MLLM/R1-OneVision-7B**",
191
+ # examples=examples,
192
+ # textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
193
+ # stop_btn="Stop Generation",
194
+ # multimodal=True,
195
+ # cache_examples=False,
196
+ # )
197
+
198
+ # demo.launch(debug=True)
199
+
200
  import gradio as gr
201
  from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
202
  from transformers.image_utils import load_image
203
  from threading import Thread
204
  import time
205
  import torch
 
206
 
207
+ # 加载模型和处理器
208
+ MODEL_ID = "Fancy-MLLM/R1-OneVision-7B"
209
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
210
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
211
  MODEL_ID,
 
213
  torch_dtype=torch.bfloat16
214
  ).to("cuda").eval()
215
 
 
216
  def model_inference(input_dict, history):
217
  text = input_dict["text"]
218
  files = input_dict["files"]
219
 
220
+ # 加载图片(如果提供)
221
  if len(files) > 1:
222
  images = [load_image(image) for image in files]
223
  elif len(files) == 1:
 
225
  else:
226
  images = []
227
 
228
+ # 输入验证
229
  if text == "" and not images:
230
+ return gr.Error("Please input a query and optionally image(s).")
 
231
  if text == "" and images:
232
+ return gr.Error("Please input a text query along with the image(s).")
 
233
 
234
+ # 准备输入消息
235
  messages = [
236
  {
237
  "role": "user",
 
242
  }
243
  ]
244
 
245
+ # 使用处理器准备输入
246
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
247
  inputs = processor(
248
  text=[prompt],
 
251
  padding=True,
252
  ).to("cuda")
253
 
254
+ # 设置最大输出token数以控制推���时间
255
+ max_new_tokens = 1024 # 可以根据实际需要调整
256
+
257
+ # 创建流式输出
258
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
259
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
260
+
261
+ # 使用后台线程执行推理
262
+ def run_inference():
263
+ model.generate(**generation_kwargs)
264
 
265
+ thread = Thread(target=run_inference)
 
266
  thread.start()
267
 
268
+ # 生成过程中更新UI
269
  buffer = ""
270
+ yield "Processing your request, please wait..."
271
  for new_text in streamer:
272
  buffer += new_text
273
+ time.sleep(0.01) # 给UI流畅更新的时间
274
  yield buffer
275
 
276
+ # 示例输入
277
  examples = [
278
  [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}]
279
  ]
280
 
281
+ # 创建Gradio界面
282
+ demo = gr.Interface(
283
  fn=model_inference,
284
  description="# **🦖 Fancy-MLLM/R1-OneVision-7B**",
285
  examples=examples,
286
+ inputs=gr.Chatbox(),
287
+ outputs=gr.Textbox(),
288
+ live=True,
289
+ allow_flagging="never",
290
+ layout="vertical",
291
+ title="Multimodal Inference with Fancy-MLLM",
292
  cache_examples=False,
293
  )
294