Spaces:
Running
on
Zero
Running
on
Zero
# import gradio as gr | |
# from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer | |
# from threading import Thread | |
# from qwen_vl_utils import process_vision_info | |
# import torch | |
# import time | |
# # Check if a GPU is available | |
# device = "cuda" if torch.cuda.is_available() else "cpu" | |
# local_path = "Fancy-MLLM/R1-OneVision-7B" | |
# # Load the model on the appropriate device (GPU if available, otherwise CPU) | |
# model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
# local_path, torch_dtype="auto", device_map=device | |
# ) | |
# processor = AutoProcessor.from_pretrained(local_path) | |
# def generate_output(image, text, button_click): | |
# # Prepare input data | |
# messages = [ | |
# { | |
# "role": "user", | |
# "content": [ | |
# {"type": "image", "image": image, 'min_pixels': 1003520, 'max_pixels': 12845056}, | |
# {"type": "text", "text": text}, | |
# ], | |
# } | |
# ] | |
# # Prepare inputs for the model | |
# text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
# image_inputs, video_inputs = process_vision_info(messages) | |
# inputs = processor( | |
# text=[text_input], | |
# images=image_inputs, | |
# videos=video_inputs, | |
# padding=True, | |
# return_tensors="pt", | |
# ) | |
# # Move inputs to the same device as the model | |
# inputs = inputs.to(model.device) | |
# streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True) | |
# generation_kwargs = dict( | |
# **inputs, | |
# streamer=streamer, | |
# max_new_tokens=4096, | |
# top_p=0.001, | |
# top_k=1, | |
# temperature=0.01, | |
# repetition_penalty=1.0, | |
# ) | |
# thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
# thread.start() | |
# generated_text = '' | |
# try: | |
# for new_text in streamer: | |
# generated_text += new_text | |
# yield f"β{generated_text}" | |
# except Exception as e: | |
# print(f"Error: {e}") | |
# yield f"Error occurred: {str(e)}" | |
# Css = """ | |
# #output-markdown { | |
# overflow-y: auto; | |
# white-space: pre-wrap; | |
# word-wrap: break-word; | |
# } | |
# #output-markdown .math { | |
# overflow-x: auto; | |
# max-width: 100%; | |
# } | |
# .markdown-text { | |
# white-space: pre-wrap; | |
# word-wrap: break-word; | |
# } | |
# .markdown-output { | |
# min-height: 20vh; | |
# max-width: 100%; | |
# overflow-y: auto; | |
# } | |
# #qwen-md .katex-display { display: inline; } | |
# #qwen-md .katex-display>.katex { display: inline; } | |
# #qwen-md .katex-display>.katex>.katex-html { display: inline; } | |
# """ | |
# with gr.Blocks(css=Css) as demo: | |
# gr.HTML("""<center><font size=8>π¦ R1-OneVision Demo</center>""") | |
# with gr.Row(): | |
# with gr.Column(): | |
# input_image = gr.Image(type="pil", label="Upload") # **ζΉε PIL ε€η** | |
# input_text = gr.Textbox(label="Input your question") | |
# with gr.Row(): | |
# clear_btn = gr.ClearButton([input_image, input_text]) | |
# submit_btn = gr.Button("Submit", variant="primary") | |
# with gr.Column(): | |
# output_text = gr.Markdown(elem_id="qwen-md", container=True, elem_classes="markdown-output") | |
# submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text) | |
# demo.launch(share=False) | |
import gradio as gr | |
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer | |
from transformers.image_utils import load_image | |
from threading import Thread | |
import time | |
import torch | |
import spaces | |
MODEL_ID = "Fancy-MLLM/R1-OneVision-7B" | |
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True) | |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
MODEL_ID, | |
trust_remote_code=True, | |
torch_dtype=torch.bfloat16 | |
).to("cuda").eval() | |
def model_inference(input_dict, history): | |
text = input_dict["text"] | |
files = input_dict["files"] | |
# Load images if provided | |
if len(files) > 1: | |
images = [load_image(image) for image in files] | |
elif len(files) == 1: | |
images = [load_image(files[0])] | |
else: | |
images = [] | |
# Validate input | |
if text == "" and not images: | |
gr.Error("Please input a query and optionally image(s).") | |
return | |
if text == "" and images: | |
gr.Error("Please input a text query along with the image(s).") | |
return | |
# Prepare messages for the model | |
messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
*[{"type": "image", "image": image} for image in images], | |
{"type": "text", "text": text}, | |
], | |
} | |
] | |
# Apply chat template and process inputs | |
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
inputs = processor( | |
text=[prompt], | |
images=images if images else None, | |
return_tensors="pt", | |
padding=True, | |
).to("cuda") | |
# Set up streamer for real-time output | |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True) | |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048) | |
# Start generation in a separate thread | |
thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
thread.start() | |
# Stream the output | |
buffer = "" | |
yield "Thinking..." | |
for new_text in streamer: | |
buffer += new_text | |
time.sleep(0.01) | |
yield buffer | |
demo = gr.ChatInterface( | |
fn=model_inference, | |
description="# **Fancy-MLLM/R1-OneVision-7B**", | |
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"), | |
stop_btn="Stop Generation", | |
multimodal=True, | |
cache_examples=False, | |
) | |
demo.launch(debug=True) | |