Spaces:

Fancy-MLLM
/

R1-Onevision

Running on Zero

App Files Files Community

Fancy-MLLM commited on Feb 11

Commit

4b1b73d

verified ·

1 Parent(s): e3288b1

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -56

app.py CHANGED Viewed

@@ -1,25 +1,24 @@
 import gradio as gr
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
-from threading import Thread
 from qwen_vl_utils import process_vision_info
 import torch
-# Specify the local cache path for models
 local_path = "Fancy-MLLM/R1-OneVision-7B"
-# Load model and processor
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     local_path, torch_dtype="auto", device_map="cpu"
 )
 processor = AutoProcessor.from_pretrained(local_path)
-# Function to process image and text and generate the output
-def generate_output(image_path, text):
-    # Load image from file path
-    from PIL import Image
-    image = Image.open(image_path).convert("RGB")
-    # Prepare input data
     messages = [
         {
             "role": "user",
@@ -29,8 +28,8 @@ def generate_output(image_path, text):
             ],
         }
     ]
-    # Prepare inputs for the model
     text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
@@ -41,12 +40,11 @@ def generate_output(image_path, text):
         padding=True,
         return_tensors="pt",
     )
-    inputs = inputs.to(model.device)  # 确保设备匹配
-    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = dict(
         **inputs,
-        streamer=streamer,
         max_new_tokens=4096,
         top_p=0.001,
         top_k=1,
@@ -54,46 +52,18 @@ def generate_output(image_path, text):
         repetition_penalty=1.0,
     )
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    generated_text = ''
-    try:
-        for new_text in streamer:
-            generated_text += new_text
-            yield f"‎{generated_text}"
-    except Exception as e:
-        yield f"Error occurred: {str(e)}"
-# CSS for UI styling
-Css = """
-#output-markdown {
-    overflow-y: auto;
-    white-space: pre-wrap;
-    word-wrap: break-word;
-}
-#output-markdown .math {
-    overflow-x: auto;
-    max-width: 100%;
-}
-.markdown-text {
-    white-space: pre-wrap;
-    word-wrap: break-word;
-}
-#qwen-md .katex-display { display: inline; }
-#qwen-md .katex-display>.katex { display: inline; }
-#qwen-md .katex-display>.katex>.katex-html { display: inline; }
-"""
-# Gradio UI
-with gr.Blocks(css=Css) as demo:
     gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(type="filepath", label="Upload")  # 关键修改：filepath 避免 UI 重绘
             input_text = gr.Textbox(label="Input your question")
             with gr.Row():
                 clear_btn = gr.ClearButton([input_image, input_text])
                 submit_btn = gr.Button("Submit", variant="primary")
@@ -101,11 +71,7 @@ with gr.Blocks(css=Css) as demo:
         with gr.Column():
             output_text = gr.Markdown(elem_id="qwen-md", container=True)
-    submit_btn.click(
-        fn=generate_output,
-        inputs=[input_image, input_text],
-        outputs=output_text,
-        queue=True
-    )
 demo.launch(share=True)

 import gradio as gr
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 import torch
+from PIL import Image
+# 指定模型路径
 local_path = "Fancy-MLLM/R1-OneVision-7B"
+# 加载模型和处理器
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     local_path, torch_dtype="auto", device_map="cpu"
 )
 processor = AutoProcessor.from_pretrained(local_path)
+# 处理输入并生成输出
+def generate_output(image, text):
+    if image is None:
+        return "Error: No image uploaded!"
+    # 处理输入数据
     messages = [
         {
             "role": "user",
             ],
         }
     ]
+    # 生成模型输入
     text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
         padding=True,
         return_tensors="pt",
     )
+    inputs = inputs.to(model.device)  # 适配 CPU/GPU
+    # **同步执行**，避免线程问题
+    output_tokens = model.generate(
         **inputs,
         max_new_tokens=4096,
         top_p=0.001,
         top_k=1,
         repetition_penalty=1.0,
     )
+    # 解析输出
+    generated_text = processor.batch_decode(output_tokens, skip_special_tokens=True)[0]
+    return generated_text  # 直接返回结果
+# UI 组件
+with gr.Blocks() as demo:
     gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(type="pil", label="Upload")  # **改回 PIL 处理**
             input_text = gr.Textbox(label="Input your question")
             with gr.Row():
                 clear_btn = gr.ClearButton([input_image, input_text])
                 submit_btn = gr.Button("Submit", variant="primary")
         with gr.Column():
             output_text = gr.Markdown(elem_id="qwen-md", container=True)
+    # 绑定事件，去掉 queue=True
+    submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text)
 demo.launch(share=True)