Fancy-MLLM commited on
Commit
4b1b73d
·
verified ·
1 Parent(s): e3288b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -56
app.py CHANGED
@@ -1,25 +1,24 @@
1
  import gradio as gr
2
- from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
3
- from threading import Thread
4
  from qwen_vl_utils import process_vision_info
5
  import torch
 
6
 
7
- # Specify the local cache path for models
8
  local_path = "Fancy-MLLM/R1-OneVision-7B"
9
 
10
- # Load model and processor
11
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
12
  local_path, torch_dtype="auto", device_map="cpu"
13
  )
14
  processor = AutoProcessor.from_pretrained(local_path)
15
 
16
- # Function to process image and text and generate the output
17
- def generate_output(image_path, text):
18
- # Load image from file path
19
- from PIL import Image
20
- image = Image.open(image_path).convert("RGB")
21
 
22
- # Prepare input data
23
  messages = [
24
  {
25
  "role": "user",
@@ -29,8 +28,8 @@ def generate_output(image_path, text):
29
  ],
30
  }
31
  ]
32
-
33
- # Prepare inputs for the model
34
  text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
35
  image_inputs, video_inputs = process_vision_info(messages)
36
 
@@ -41,12 +40,11 @@ def generate_output(image_path, text):
41
  padding=True,
42
  return_tensors="pt",
43
  )
44
- inputs = inputs.to(model.device) # 确保设备匹配
45
 
46
- streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
47
- generation_kwargs = dict(
48
  **inputs,
49
- streamer=streamer,
50
  max_new_tokens=4096,
51
  top_p=0.001,
52
  top_k=1,
@@ -54,46 +52,18 @@ def generate_output(image_path, text):
54
  repetition_penalty=1.0,
55
  )
56
 
57
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
58
- thread.start()
59
-
60
- generated_text = ''
61
- try:
62
- for new_text in streamer:
63
- generated_text += new_text
64
- yield f"‎{generated_text}"
65
- except Exception as e:
66
- yield f"Error occurred: {str(e)}"
67
 
68
- # CSS for UI styling
69
- Css = """
70
- #output-markdown {
71
- overflow-y: auto;
72
- white-space: pre-wrap;
73
- word-wrap: break-word;
74
- }
75
- #output-markdown .math {
76
- overflow-x: auto;
77
- max-width: 100%;
78
- }
79
- .markdown-text {
80
- white-space: pre-wrap;
81
- word-wrap: break-word;
82
- }
83
- #qwen-md .katex-display { display: inline; }
84
- #qwen-md .katex-display>.katex { display: inline; }
85
- #qwen-md .katex-display>.katex>.katex-html { display: inline; }
86
- """
87
-
88
- # Gradio UI
89
- with gr.Blocks(css=Css) as demo:
90
  gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
91
 
92
  with gr.Row():
93
  with gr.Column():
94
- input_image = gr.Image(type="filepath", label="Upload") # 关键修改:filepath 避免 UI 重绘
95
  input_text = gr.Textbox(label="Input your question")
96
-
97
  with gr.Row():
98
  clear_btn = gr.ClearButton([input_image, input_text])
99
  submit_btn = gr.Button("Submit", variant="primary")
@@ -101,11 +71,7 @@ with gr.Blocks(css=Css) as demo:
101
  with gr.Column():
102
  output_text = gr.Markdown(elem_id="qwen-md", container=True)
103
 
104
- submit_btn.click(
105
- fn=generate_output,
106
- inputs=[input_image, input_text],
107
- outputs=output_text,
108
- queue=True
109
- )
110
 
111
  demo.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 
3
  from qwen_vl_utils import process_vision_info
4
  import torch
5
+ from PIL import Image
6
 
7
+ # 指定模型路径
8
  local_path = "Fancy-MLLM/R1-OneVision-7B"
9
 
10
+ # 加载模型和处理器
11
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
12
  local_path, torch_dtype="auto", device_map="cpu"
13
  )
14
  processor = AutoProcessor.from_pretrained(local_path)
15
 
16
+ # 处理输入并生成输出
17
+ def generate_output(image, text):
18
+ if image is None:
19
+ return "Error: No image uploaded!"
 
20
 
21
+ # 处理输入数据
22
  messages = [
23
  {
24
  "role": "user",
 
28
  ],
29
  }
30
  ]
31
+
32
+ # 生成模型输入
33
  text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
34
  image_inputs, video_inputs = process_vision_info(messages)
35
 
 
40
  padding=True,
41
  return_tensors="pt",
42
  )
43
+ inputs = inputs.to(model.device) # 适配 CPU/GPU
44
 
45
+ # **同步执行**,避免线程问题
46
+ output_tokens = model.generate(
47
  **inputs,
 
48
  max_new_tokens=4096,
49
  top_p=0.001,
50
  top_k=1,
 
52
  repetition_penalty=1.0,
53
  )
54
 
55
+ # 解析输出
56
+ generated_text = processor.batch_decode(output_tokens, skip_special_tokens=True)[0]
57
+ return generated_text # 直接返回结果
 
 
 
 
 
 
 
58
 
59
+ # UI 组件
60
+ with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
62
 
63
  with gr.Row():
64
  with gr.Column():
65
+ input_image = gr.Image(type="pil", label="Upload") # **改回 PIL 处理**
66
  input_text = gr.Textbox(label="Input your question")
 
67
  with gr.Row():
68
  clear_btn = gr.ClearButton([input_image, input_text])
69
  submit_btn = gr.Button("Submit", variant="primary")
 
71
  with gr.Column():
72
  output_text = gr.Markdown(elem_id="qwen-md", container=True)
73
 
74
+ # 绑定事件,去掉 queue=True
75
+ submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text)
 
 
 
 
76
 
77
  demo.launch(share=True)