Spaces:

Fancy-MLLM
/

R1-Onevision

Running on Zero

App Files Files Community

Fancy-MLLM commited on Feb 12

Commit

6a423bd

verified ·

1 Parent(s): f7ae658

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -88

app.py CHANGED Viewed

@@ -5,10 +5,14 @@ from qwen_vl_utils import process_vision_info
 import torch
 import time
 local_path = "Fancy-MLLM/R1-OneVision-7B"
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    local_path, torch_dtype="auto", device_map="cpu"
 )
 processor = AutoProcessor.from_pretrained(local_path)
@@ -26,8 +30,6 @@ def generate_output(image, text, button_click):
     # Prepare inputs for the model
     text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    # print(text_input)
-    # import pdb; pdb.set_trace()
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text_input],
@@ -36,6 +38,8 @@ def generate_output(image, text, button_click):
         padding=True,
         return_tensors="pt",
     )
     inputs = inputs.to(model.device)
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
@@ -48,6 +52,7 @@ def generate_output(image, text, button_click):
         temperature=0.01,
         repetition_penalty=1.0,
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
     generated_text = ''
@@ -56,8 +61,6 @@ def generate_output(image, text, button_click):
         for new_text in streamer:
             generated_text += new_text
             yield f"‎{generated_text}"
-            # print(f"Current text: {generated_text}")  # 调试输出
-            # yield generated_text  # 直接输出原始文本
     except Exception as e:
         print(f"Error: {e}")
         yield f"Error occurred: {str(e)}"
@@ -68,7 +71,6 @@ Css = """
     white-space: pre-wrap;
     word-wrap: break-word;
 }
 #output-markdown .math {
     overflow-x: auto;
     max-width: 100%;
@@ -87,7 +89,6 @@ Css = """
 #qwen-md .katex-display>.katex>.katex-html { display: inline; }
 """
 with gr.Blocks(css=Css) as demo:
     gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
@@ -105,84 +106,3 @@ with gr.Blocks(css=Css) as demo:
     submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text)
 demo.launch(share=True)
-# Css = """
-# #output-markdown {
-#     overflow-y: auto;
-#     white-space: pre-wrap;
-#     word-wrap: break-word;
-# }
-# #output-markdown .math {
-#     overflow-x: auto;
-#     max-width: 100%;
-# }
-# .markdown-text {
-#     white-space: pre-wrap;
-#     word-wrap: break-word;
-# }
-# #qwen-md .katex-display { display: inline; }
-# #qwen-md .katex-display>.katex { display: inline; }
-# #qwen-md .katex-display>.katex>.katex-html { display: inline; }
-# """
-# # UI 组件
-# with gr.Blocks(css=Css) as demo:
-#     gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
-#     with gr.Row():
-#         with gr.Column():
-#             input_image = gr.Image(type="pil", label="Upload")
-#             input_text = gr.Textbox(label="input your question")
-#             with gr.Row():
-#                 with gr.Column():
-#                     clear_btn = gr.ClearButton([input_image, input_text])
-#                 with gr.Column():
-#                     submit_btn = gr.Button("Submit", variant="primary")
-#         with gr.Column():
-#             output_text = gr.Markdown(
-#                 label="Generated Response",
-#                 max_height="80vh",
-#                 min_height="50vh",
-#                 container=True,
-#                 latex_delimiters=[{
-#                                         "left": "\\(",
-#                                         "right": "\\)",
-#                                         "display": True
-#                                     }, {
-#                                         "left": "\\begin\{equation\}",
-#                                         "right": "\\end\{equation\}",
-#                                         "display": True
-#                                     }, {
-#                                         "left": "\\begin\{align\}",
-#                                         "right": "\\end\{align\}",
-#                                         "display": True
-#                                     }, {
-#                                         "left": "\\begin\{alignat\}",
-#                                         "right": "\\end\{alignat\}",
-#                                         "display": True
-#                                     }, {
-#                                         "left": "\\begin\{gather\}",
-#                                         "right": "\\end\{gather\}",
-#                                         "display": True
-#                                     }, {
-#                                         "left": "\\begin\{CD\}",
-#                                         "right": "\\end\{CD\}",
-#                                         "display": True
-#                                     }, {
-#                                         "left": "\\[",
-#                                         "right": "\\]",
-#                                         "display": True
-#                                     }],
-#                 elem_id="qwen-md")
-#     submit_btn.click(
-#         fn=generate_output,
-#         inputs=[input_image, input_text],
-#         outputs=output_text,
-#         queue=True
-#     )
-# demo.launch(share=True)

 import torch
 import time
+# Check if a GPU is available
+device = "cuda" if torch.cuda.is_available() else "cpu"
 local_path = "Fancy-MLLM/R1-OneVision-7B"
+# Load the model on the appropriate device (GPU if available, otherwise CPU)
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    local_path, torch_dtype="auto", device_map=device
 )
 processor = AutoProcessor.from_pretrained(local_path)
     # Prepare inputs for the model
     text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text_input],
         padding=True,
         return_tensors="pt",
     )
+    # Move inputs to the same device as the model
     inputs = inputs.to(model.device)
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
         temperature=0.01,
         repetition_penalty=1.0,
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
     generated_text = ''
         for new_text in streamer:
             generated_text += new_text
             yield f"‎{generated_text}"
     except Exception as e:
         print(f"Error: {e}")
         yield f"Error occurred: {str(e)}"
     white-space: pre-wrap;
     word-wrap: break-word;
 }
 #output-markdown .math {
     overflow-x: auto;
     max-width: 100%;
 #qwen-md .katex-display>.katex>.katex-html { display: inline; }
 """
 with gr.Blocks(css=Css) as demo:
     gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
     submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text)
 demo.launch(share=True)