Fancy-MLLM commited on
Commit
2ba0a0c
·
verified ·
1 Parent(s): 4b1b73d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -28
app.py CHANGED
@@ -1,24 +1,19 @@
1
  import gradio as gr
2
- from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 
3
  from qwen_vl_utils import process_vision_info
4
  import torch
5
- from PIL import Image
6
 
7
- # 指定模型路径
8
  local_path = "Fancy-MLLM/R1-OneVision-7B"
9
 
10
- # 加载模型和处理器
11
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
12
  local_path, torch_dtype="auto", device_map="cpu"
13
  )
14
  processor = AutoProcessor.from_pretrained(local_path)
15
 
16
- # 处理输入并生成输出
17
- def generate_output(image, text):
18
- if image is None:
19
- return "Error: No image uploaded!"
20
-
21
- # 处理输入数据
22
  messages = [
23
  {
24
  "role": "user",
@@ -28,11 +23,12 @@ def generate_output(image, text):
28
  ],
29
  }
30
  ]
31
-
32
- # 生成模型输入
33
  text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
34
  image_inputs, video_inputs = process_vision_info(messages)
35
-
36
  inputs = processor(
37
  text=[text_input],
38
  images=image_inputs,
@@ -40,38 +36,109 @@ def generate_output(image, text):
40
  padding=True,
41
  return_tensors="pt",
42
  )
43
- inputs = inputs.to(model.device) # 适配 CPU/GPU
44
 
45
- # **同步执行**,避免线程问题
46
- output_tokens = model.generate(
47
  **inputs,
 
48
  max_new_tokens=4096,
49
  top_p=0.001,
50
  top_k=1,
51
  temperature=0.01,
52
  repetition_penalty=1.0,
53
  )
 
 
 
54
 
55
- # 解析输出
56
- generated_text = processor.batch_decode(output_tokens, skip_special_tokens=True)[0]
57
- return generated_text # 直接返回结果
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  # UI 组件
60
- with gr.Blocks() as demo:
61
  gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
62
 
63
  with gr.Row():
64
  with gr.Column():
65
- input_image = gr.Image(type="pil", label="Upload") # **改回 PIL 处理**
66
- input_text = gr.Textbox(label="Input your question")
67
  with gr.Row():
68
- clear_btn = gr.ClearButton([input_image, input_text])
69
- submit_btn = gr.Button("Submit", variant="primary")
 
 
70
 
71
  with gr.Column():
72
- output_text = gr.Markdown(elem_id="qwen-md", container=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- # 绑定事件,去掉 queue=True
75
- submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text)
76
 
77
- demo.launch(share=True)
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
3
+ from threading import Thread
4
  from qwen_vl_utils import process_vision_info
5
  import torch
6
+ import time
7
 
 
8
  local_path = "Fancy-MLLM/R1-OneVision-7B"
9
 
 
10
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
11
  local_path, torch_dtype="auto", device_map="cpu"
12
  )
13
  processor = AutoProcessor.from_pretrained(local_path)
14
 
15
+ def generate_output(image, text, button_click):
16
+ # Prepare input data
 
 
 
 
17
  messages = [
18
  {
19
  "role": "user",
 
23
  ],
24
  }
25
  ]
26
+
27
+ # Prepare inputs for the model
28
  text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
29
+ # print(text_input)
30
+ # import pdb; pdb.set_trace()
31
  image_inputs, video_inputs = process_vision_info(messages)
 
32
  inputs = processor(
33
  text=[text_input],
34
  images=image_inputs,
 
36
  padding=True,
37
  return_tensors="pt",
38
  )
39
+ inputs = inputs.to(model.device)
40
 
41
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
42
+ generation_kwargs = dict(
43
  **inputs,
44
+ streamer=streamer,
45
  max_new_tokens=4096,
46
  top_p=0.001,
47
  top_k=1,
48
  temperature=0.01,
49
  repetition_penalty=1.0,
50
  )
51
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
52
+ thread.start()
53
+ generated_text = ''
54
 
55
+ try:
56
+ for new_text in streamer:
57
+ generated_text += new_text
58
+ yield f"‎{generated_text}"
59
+ # print(f"Current text: {generated_text}") # 调试输出
60
+ # yield generated_text # 直接输出原始文本
61
+ except Exception as e:
62
+ print(f"Error: {e}")
63
+ yield f"Error occurred: {str(e)}"
64
+
65
+ Css = """
66
+ #output-markdown {
67
+ overflow-y: auto;
68
+ white-space: pre-wrap;
69
+ word-wrap: break-word;
70
+ }
71
+
72
+ #output-markdown .math {
73
+ overflow-x: auto;
74
+ max-width: 100%;
75
+ }
76
+ .markdown-text {
77
+ white-space: pre-wrap;
78
+ word-wrap: break-word;
79
+ }
80
+ #qwen-md .katex-display { display: inline; }
81
+ #qwen-md .katex-display>.katex { display: inline; }
82
+ #qwen-md .katex-display>.katex>.katex-html { display: inline; }
83
+ """
84
 
85
  # UI 组件
86
+ with gr.Blocks(css=Css) as demo:
87
  gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""")
88
 
89
  with gr.Row():
90
  with gr.Column():
91
+ input_image = gr.Image(type="pil", label="Upload"),
92
+ input_text = gr.Textbox(label="input your question")
93
  with gr.Row():
94
+ with gr.Column():
95
+ clear_btn = gr.ClearButton([*input_image, input_text])
96
+ with gr.Column():
97
+ submit_btn = gr.Button("Submit", variant="primary")
98
 
99
  with gr.Column():
100
+ output_text = gr.Markdown(
101
+ label="Generated Response",
102
+ max_height="80vh",
103
+ min_height="50vh",
104
+ container=True,
105
+ latex_delimiters=[{
106
+ "left": "\\(",
107
+ "right": "\\)",
108
+ "display": True
109
+ }, {
110
+ "left": "\\begin\{equation\}",
111
+ "right": "\\end\{equation\}",
112
+ "display": True
113
+ }, {
114
+ "left": "\\begin\{align\}",
115
+ "right": "\\end\{align\}",
116
+ "display": True
117
+ }, {
118
+ "left": "\\begin\{alignat\}",
119
+ "right": "\\end\{alignat\}",
120
+ "display": True
121
+ }, {
122
+ "left": "\\begin\{gather\}",
123
+ "right": "\\end\{gather\}",
124
+ "display": True
125
+ }, {
126
+ "left": "\\begin\{CD\}",
127
+ "right": "\\end\{CD\}",
128
+ "display": True
129
+ }, {
130
+ "left": "\\[",
131
+ "right": "\\]",
132
+ "display": True
133
+ }],
134
+ elem_id="qwen-md")
135
+
136
 
 
 
137
 
138
+ submit_btn.click(
139
+ fn=generate_output,
140
+ inputs=[*input_image, input_text],
141
+ outputs=output_text,
142
+ queue=True
143
+ )
144
+ demo.launch(share=True)