Spaces:

Fancy-MLLM
/

R1-Onevision

Running on Zero

App Files Files Community

Fancy-MLLM commited on Feb 11

Commit

2ba0a0c

verified ·

1 Parent(s): 4b1b73d

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -28

app.py CHANGED Viewed

@@ -1,24 +1,19 @@
 import gradio as gr
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 import torch
-from PIL import Image
-# 指定模型路径
 local_path = "Fancy-MLLM/R1-OneVision-7B"
-# 加载模型和处理器
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     local_path, torch_dtype="auto", device_map="cpu"
 )
 processor = AutoProcessor.from_pretrained(local_path)
-# 处理输入并生成输出
-def generate_output(image, text):
-    if image is None:
-        return "Error: No image uploaded!"
-    # 处理输入数据
     messages = [
         {
             "role": "user",
@@ -28,11 +23,12 @@ def generate_output(image, text):
             ],
         }
     ]
-    # 生成模型输入
     text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text_input],
         images=image_inputs,
@@ -40,38 +36,109 @@ def generate_output(image, text):
         padding=True,
         return_tensors="pt",
     )
-    inputs = inputs.to(model.device)  # 适配 CPU/GPU
-    # **同步执行**，避免线程问题
-    output_tokens = model.generate(
         **inputs,
         max_new_tokens=4096,
         top_p=0.001,
         top_k=1,
         temperature=0.01,
         repetition_penalty=1.0,
     )
-    # 解析输出
-    generated_text = processor.batch_decode(output_tokens, skip_special_tokens=True)[0]
-    return generated_text  # 直接返回结果
 # UI 组件
-with gr.Blocks() as demo:
     gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(type="pil", label="Upload")  # **改回 PIL 处理**
-            input_text = gr.Textbox(label="Input your question")
             with gr.Row():
-                clear_btn = gr.ClearButton([input_image, input_text])
-                submit_btn = gr.Button("Submit", variant="primary")
         with gr.Column():
-            output_text = gr.Markdown(elem_id="qwen-md", container=True)
-    # 绑定事件，去掉 queue=True
-    submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text)
-demo.launch(share=True)

 import gradio as gr
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
+from threading import Thread
 from qwen_vl_utils import process_vision_info
 import torch
+import time
 local_path = "Fancy-MLLM/R1-OneVision-7B"
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     local_path, torch_dtype="auto", device_map="cpu"
 )
 processor = AutoProcessor.from_pretrained(local_path)
+def generate_output(image, text, button_click):
+    # Prepare input data
     messages = [
         {
             "role": "user",
             ],
         }
     ]
+    # Prepare inputs for the model
     text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    # print(text_input)
+    # import pdb; pdb.set_trace()
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text_input],
         images=image_inputs,
         padding=True,
         return_tensors="pt",
     )
+    inputs = inputs.to(model.device)
+    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = dict(
         **inputs,
+        streamer=streamer,
         max_new_tokens=4096,
         top_p=0.001,
         top_k=1,
         temperature=0.01,
         repetition_penalty=1.0,
     )
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    generated_text = ''
+    try:
+        for new_text in streamer:
+            generated_text += new_text
+            yield f"‎{generated_text}"
+            # print(f"Current text: {generated_text}")  # 调试输出
+            # yield generated_text  # 直接输出原始文本
+    except Exception as e:
+        print(f"Error: {e}")
+        yield f"Error occurred: {str(e)}"
+Css = """
+#output-markdown {
+    overflow-y: auto;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+#output-markdown .math {
+    overflow-x: auto;
+    max-width: 100%;
+}
+.markdown-text {
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+#qwen-md .katex-display { display: inline; }
+#qwen-md .katex-display>.katex { display: inline; }
+#qwen-md .katex-display>.katex>.katex-html { display: inline; }
+"""
 # UI 组件
+with gr.Blocks(css=Css) as demo:
     gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(type="pil", label="Upload"),
+            input_text = gr.Textbox(label="input your question")
             with gr.Row():
+                with gr.Column():
+                    clear_btn = gr.ClearButton([*input_image, input_text])
+                with gr.Column():
+                    submit_btn = gr.Button("Submit", variant="primary")
         with gr.Column():
+            output_text = gr.Markdown(
+                label="Generated Response",
+                max_height="80vh",
+                min_height="50vh",
+                container=True,
+                latex_delimiters=[{
+                                        "left": "\\(",
+                                        "right": "\\)",
+                                        "display": True
+                                    }, {
+                                        "left": "\\begin\{equation\}",
+                                        "right": "\\end\{equation\}",
+                                        "display": True
+                                    }, {
+                                        "left": "\\begin\{align\}",
+                                        "right": "\\end\{align\}",
+                                        "display": True
+                                    }, {
+                                        "left": "\\begin\{alignat\}",
+                                        "right": "\\end\{alignat\}",
+                                        "display": True
+                                    }, {
+                                        "left": "\\begin\{gather\}",
+                                        "right": "\\end\{gather\}",
+                                        "display": True
+                                    }, {
+                                        "left": "\\begin\{CD\}",
+                                        "right": "\\end\{CD\}",
+                                        "display": True
+                                    }, {
+                                        "left": "\\[",
+                                        "right": "\\]",
+                                        "display": True
+                                    }],
+                elem_id="qwen-md")
+    submit_btn.click(
+        fn=generate_output,
+        inputs=[*input_image, input_text],
+        outputs=output_text,
+        queue=True
+    )
+demo.launch(share=True)