Spaces:

Fancy-MLLM
/

R1-Onevision

Running on Zero

App Files Files Community

Fancy-MLLM commited on Feb 12

Commit

4c6ecb5

verified ·

1 Parent(s): 7e3ed30

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -22

app.py CHANGED Viewed

@@ -108,15 +108,104 @@
 # demo.launch(share=False)
 import gradio as gr
 from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
 from transformers.image_utils import load_image
 from threading import Thread
 import time
 import torch
-import spaces
-MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID,
@@ -124,12 +213,11 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.bfloat16
 ).to("cuda").eval()
-@spaces.GPU(duration=200)
 def model_inference(input_dict, history):
     text = input_dict["text"]
     files = input_dict["files"]
-    # Load images if provided
     if len(files) > 1:
         images = [load_image(image) for image in files]
     elif len(files) == 1:
@@ -137,15 +225,13 @@ def model_inference(input_dict, history):
     else:
         images = []
-    # Validate input
     if text == "" and not images:
-        gr.Error("Please input a query and optionally image(s).")
-        return
     if text == "" and images:
-        gr.Error("Please input a text query along with the image(s).")
-        return
-    # Prepare messages for the model
     messages = [
         {
             "role": "user",
@@ -156,7 +242,7 @@ def model_inference(input_dict, history):
         }
     ]
-    # Apply chat template and process inputs
     prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(
         text=[prompt],
@@ -165,33 +251,44 @@ def model_inference(input_dict, history):
         padding=True,
     ).to("cuda")
-    # Set up streamer for real-time output
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
-    # Start generation in a separate thread
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
-    # Stream the output
     buffer = ""
-    yield "Thinking..."
     for new_text in streamer:
         buffer += new_text
-        time.sleep(0.01)
         yield buffer
 examples = [
     [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}]
 ]
-demo = gr.ChatInterface(
     fn=model_inference,
     description="# **🦖 Fancy-MLLM/R1-OneVision-7B**",
     examples=examples,
-    textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
-    stop_btn="Stop Generation",
-    multimodal=True,
     cache_examples=False,
 )

 # demo.launch(share=False)
+# import gradio as gr
+# from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
+# from transformers.image_utils import load_image
+# from threading import Thread
+# import time
+# import torch
+# import spaces
+# MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
+# processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+# model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+#     MODEL_ID,
+#     trust_remote_code=True,
+#     torch_dtype=torch.bfloat16
+# ).to("cuda").eval()
+# @spaces.GPU(duration=200)
+# def model_inference(input_dict, history):
+#     text = input_dict["text"]
+#     files = input_dict["files"]
+#     # Load images if provided
+#     if len(files) > 1:
+#         images = [load_image(image) for image in files]
+#     elif len(files) == 1:
+#         images = [load_image(files[0])]
+#     else:
+#         images = []
+#     # Validate input
+#     if text == "" and not images:
+#         gr.Error("Please input a query and optionally image(s).")
+#         return
+#     if text == "" and images:
+#         gr.Error("Please input a text query along with the image(s).")
+#         return
+#     # Prepare messages for the model
+#     messages = [
+#         {
+#             "role": "user",
+#             "content": [
+#                 *[{"type": "image", "image": image} for image in images],
+#                 {"type": "text", "text": text},
+#             ],
+#         }
+#     ]
+#     # Apply chat template and process inputs
+#     prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+#     inputs = processor(
+#         text=[prompt],
+#         images=images if images else None,
+#         return_tensors="pt",
+#         padding=True,
+#     ).to("cuda")
+#     # Set up streamer for real-time output
+#     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+#     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
+#     # Start generation in a separate thread
+#     thread = Thread(target=model.generate, kwargs=generation_kwargs)
+#     thread.start()
+#     # Stream the output
+#     buffer = ""
+#     yield "Thinking..."
+#     for new_text in streamer:
+#         buffer += new_text
+#         time.sleep(0.01)
+#         yield buffer
+# examples = [
+#     [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}]
+# ]
+# demo = gr.ChatInterface(
+#     fn=model_inference,
+#     description="# **🦖 Fancy-MLLM/R1-OneVision-7B**",
+#     examples=examples,
+#     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
+#     stop_btn="Stop Generation",
+#     multimodal=True,
+#     cache_examples=False,
+# )
+# demo.launch(debug=True)
 import gradio as gr
 from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
 from transformers.image_utils import load_image
 from threading import Thread
 import time
 import torch
+# 加载模型和处理器
+MODEL_ID = "Fancy-MLLM/R1-OneVision-7B"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16
 ).to("cuda").eval()
 def model_inference(input_dict, history):
     text = input_dict["text"]
     files = input_dict["files"]
+    # 加载图片（如果提供）
     if len(files) > 1:
         images = [load_image(image) for image in files]
     elif len(files) == 1:
     else:
         images = []
+    # 输入验证
     if text == "" and not images:
+        return gr.Error("Please input a query and optionally image(s).")
     if text == "" and images:
+        return gr.Error("Please input a text query along with the image(s).")
+    # 准备输入消息
     messages = [
         {
             "role": "user",
         }
     ]
+    # 使用处理器准备输入
     prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(
         text=[prompt],
         padding=True,
     ).to("cuda")
+    # 设置最大输出token数以控制推���时间
+    max_new_tokens = 1024  # 可以根据实际需要调整
+    # 创建流式输出
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
+    # 使用后台线程执行推理
+    def run_inference():
+        model.generate(**generation_kwargs)
+    thread = Thread(target=run_inference)
     thread.start()
+    # 生成过程中更新UI
     buffer = ""
+    yield "Processing your request, please wait..."
     for new_text in streamer:
         buffer += new_text
+        time.sleep(0.01)  # 给UI流畅更新的时间
         yield buffer
+# 示例输入
 examples = [
     [{"text": "Hint: Please answer the question and provide the final answer at the end. Question: Which number do you have to write in the last daisy?", "files": ["5.jpg"]}]
 ]
+# 创建Gradio界面
+demo = gr.Interface(
     fn=model_inference,
     description="# **🦖 Fancy-MLLM/R1-OneVision-7B**",
     examples=examples,
+    inputs=gr.Chatbox(),
+    outputs=gr.Textbox(),
+    live=True,
+    allow_flagging="never",
+    layout="vertical",
+    title="Multimodal Inference with Fancy-MLLM",
     cache_examples=False,
 )