Fancy-MLLM commited on
Commit
53c6808
·
verified ·
1 Parent(s): 4c6ecb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -125
app.py CHANGED
@@ -108,104 +108,15 @@
108
  # demo.launch(share=False)
109
 
110
 
111
- # import gradio as gr
112
- # from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
113
- # from transformers.image_utils import load_image
114
- # from threading import Thread
115
- # import time
116
- # import torch
117
- # import spaces
118
-
119
- # MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
120
- # processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
121
- # model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
122
- # MODEL_ID,
123
- # trust_remote_code=True,
124
- # torch_dtype=torch.bfloat16
125
- # ).to("cuda").eval()
126
-
127
- # @spaces.GPU(duration=200)
128
- # def model_inference(input_dict, history):
129
- # text = input_dict["text"]
130
- # files = input_dict["files"]
131
-
132
- # # Load images if provided
133
- # if len(files) > 1:
134
- # images = [load_image(image) for image in files]
135
- # elif len(files) == 1:
136
- # images = [load_image(files[0])]
137
- # else:
138
- # images = []
139
-
140
- # # Validate input
141
- # if text == "" and not images:
142
- # gr.Error("Please input a query and optionally image(s).")
143
- # return
144
- # if text == "" and images:
145
- # gr.Error("Please input a text query along with the image(s).")
146
- # return
147
-
148
- # # Prepare messages for the model
149
- # messages = [
150
- # {
151
- # "role": "user",
152
- # "content": [
153
- # *[{"type": "image", "image": image} for image in images],
154
- # {"type": "text", "text": text},
155
- # ],
156
- # }
157
- # ]
158
-
159
- # # Apply chat template and process inputs
160
- # prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
161
- # inputs = processor(
162
- # text=[prompt],
163
- # images=images if images else None,
164
- # return_tensors="pt",
165
- # padding=True,
166
- # ).to("cuda")
167
-
168
- # # Set up streamer for real-time output
169
- # streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
170
- # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
171
-
172
- # # Start generation in a separate thread
173
- # thread = Thread(target=model.generate, kwargs=generation_kwargs)
174
- # thread.start()
175
-
176
- # # Stream the output
177
- # buffer = ""
178
- # yield "Thinking..."
179
- # for new_text in streamer:
180
- # buffer += new_text
181
- # time.sleep(0.01)
182
- # yield buffer
183
-
184
- # examples = [
185
- # [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}]
186
- # ]
187
-
188
- # demo = gr.ChatInterface(
189
- # fn=model_inference,
190
- # description="# **🦖 Fancy-MLLM/R1-OneVision-7B**",
191
- # examples=examples,
192
- # textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
193
- # stop_btn="Stop Generation",
194
- # multimodal=True,
195
- # cache_examples=False,
196
- # )
197
-
198
- # demo.launch(debug=True)
199
-
200
  import gradio as gr
201
  from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
202
  from transformers.image_utils import load_image
203
  from threading import Thread
204
  import time
205
  import torch
 
206
 
207
- # 加载模型和处理器
208
- MODEL_ID = "Fancy-MLLM/R1-OneVision-7B"
209
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
210
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
211
  MODEL_ID,
@@ -213,11 +124,12 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
213
  torch_dtype=torch.bfloat16
214
  ).to("cuda").eval()
215
 
 
216
  def model_inference(input_dict, history):
217
  text = input_dict["text"]
218
  files = input_dict["files"]
219
 
220
- # 加载图片(如果提供)
221
  if len(files) > 1:
222
  images = [load_image(image) for image in files]
223
  elif len(files) == 1:
@@ -225,13 +137,15 @@ def model_inference(input_dict, history):
225
  else:
226
  images = []
227
 
228
- # 输入验证
229
  if text == "" and not images:
230
- return gr.Error("Please input a query and optionally image(s).")
 
231
  if text == "" and images:
232
- return gr.Error("Please input a text query along with the image(s).")
 
233
 
234
- # 准备输入消息
235
  messages = [
236
  {
237
  "role": "user",
@@ -242,7 +156,7 @@ def model_inference(input_dict, history):
242
  }
243
  ]
244
 
245
- # 使用处理器准备输入
246
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
247
  inputs = processor(
248
  text=[prompt],
@@ -251,44 +165,55 @@ def model_inference(input_dict, history):
251
  padding=True,
252
  ).to("cuda")
253
 
254
- # 设置最大输出token数以控制推理时间
255
- max_new_tokens = 1024 # 可��根据实际需要调整
256
-
257
- # 创建流式输出
 
 
 
 
 
 
 
 
 
 
 
258
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
259
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
260
-
261
- # 使用后台线程执行推理
262
- def run_inference():
263
- model.generate(**generation_kwargs)
264
-
265
- thread = Thread(target=run_inference)
 
 
 
 
266
  thread.start()
 
 
 
 
 
 
 
 
 
267
 
268
- # 生成过程中更新UI
269
- buffer = ""
270
- yield "Processing your request, please wait..."
271
- for new_text in streamer:
272
- buffer += new_text
273
- time.sleep(0.01) # 给UI流畅更新的时间
274
- yield buffer
275
-
276
- # 示例输入
277
  examples = [
278
  [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}]
279
  ]
280
 
281
- # 创建Gradio界面
282
- demo = gr.Interface(
283
  fn=model_inference,
284
  description="# **🦖 Fancy-MLLM/R1-OneVision-7B**",
285
  examples=examples,
286
- inputs=gr.Chatbox(),
287
- outputs=gr.Textbox(),
288
- live=True,
289
- allow_flagging="never",
290
- layout="vertical",
291
- title="Multimodal Inference with Fancy-MLLM",
292
  cache_examples=False,
293
  )
294
 
 
108
  # demo.launch(share=False)
109
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  import gradio as gr
112
  from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
113
  from transformers.image_utils import load_image
114
  from threading import Thread
115
  import time
116
  import torch
117
+ import spaces
118
 
119
+ MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
 
120
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
121
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
122
  MODEL_ID,
 
124
  torch_dtype=torch.bfloat16
125
  ).to("cuda").eval()
126
 
127
+ @spaces.GPU(duration=200)
128
  def model_inference(input_dict, history):
129
  text = input_dict["text"]
130
  files = input_dict["files"]
131
 
132
+ # Load images if provided
133
  if len(files) > 1:
134
  images = [load_image(image) for image in files]
135
  elif len(files) == 1:
 
137
  else:
138
  images = []
139
 
140
+ # Validate input
141
  if text == "" and not images:
142
+ gr.Error("Please input a query and optionally image(s).")
143
+ return
144
  if text == "" and images:
145
+ gr.Error("Please input a text query along with the image(s).")
146
+ return
147
 
148
+ # Prepare messages for the model
149
  messages = [
150
  {
151
  "role": "user",
 
156
  }
157
  ]
158
 
159
+ # Apply chat template and process inputs
160
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
161
  inputs = processor(
162
  text=[prompt],
 
165
  padding=True,
166
  ).to("cuda")
167
 
168
+ # # Set up streamer for real-time output
169
+ # streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
170
+ # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
171
+
172
+ # # Start generation in a separate thread
173
+ # thread = Thread(target=model.generate, kwargs=generation_kwargs)
174
+ # thread.start()
175
+
176
+ # # Stream the output
177
+ # buffer = ""
178
+ # yield "Thinking..."
179
+ # for new_text in streamer:
180
+ # buffer += new_text
181
+ # time.sleep(0.01)
182
+ # yield buffer
183
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
184
+ generation_kwargs = dict(
185
+ **inputs,
186
+ streamer=streamer,
187
+ max_new_tokens=2048,
188
+ top_p=0.001,
189
+ top_k=1,
190
+ temperature=0.01,
191
+ repetition_penalty=1.0,
192
+ )
193
+
194
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
195
  thread.start()
196
+ generated_text = ''
197
+
198
+ try:
199
+ for new_text in streamer:
200
+ generated_text += new_text
201
+ yield generated_text
202
+ except Exception as e:
203
+ print(f"Error: {e}")
204
+ yield f"Error occurred: {str(e)}"
205
 
 
 
 
 
 
 
 
 
 
206
  examples = [
207
  [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}]
208
  ]
209
 
210
+ demo = gr.ChatInterface(
 
211
  fn=model_inference,
212
  description="# **🦖 Fancy-MLLM/R1-OneVision-7B**",
213
  examples=examples,
214
+ textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
215
+ stop_btn="Stop Generation",
216
+ multimodal=True,
 
 
 
217
  cache_examples=False,
218
  )
219