aleafy commited on
Commit
99da57b
·
1 Parent(s): edadb90

new readme

Browse files
Files changed (42) hide show
  1. .gitignore +2 -1
  2. README.md +5 -13
  3. __pycache__/db_examples.cpython-310.pyc +0 -0
  4. __pycache__/demo_utils1.cpython-310.pyc +0 -0
  5. app1_a.py +386 -0
  6. app1_bf.py +388 -0
  7. app1_bf2.py +388 -0
  8. db_examples.py +186 -55
  9. db_examples_bf.py +260 -0
  10. demo/clean_bg_extracted/47/cropped_video.mp4 +0 -0
  11. demo/clean_bg_extracted/57/cropped_video.mp4 +0 -0
  12. demo/clean_bg_extracted/58/cropped_video.mp4 +0 -0
  13. demo/clean_bg_extracted/62/cropped_video.mp4 +0 -0
  14. demo/clean_fg_extracted/0/cropped_video.mp4 +0 -0
  15. demo/clean_fg_extracted/0/frames/0000.png +0 -0
  16. demo/clean_fg_extracted/14/frames/0000.png +0 -0
  17. demo/clean_fg_extracted/14/frames/0000_rmbg.png +0 -0
  18. demo/clean_fg_extracted/15/frames/0000.png +0 -0
  19. demo/clean_fg_extracted/15/frames/0000_rmbg.png +0 -0
  20. demo/clean_fg_extracted/18/frames/0000.png +0 -0
  21. demo/clean_fg_extracted/18/frames/0000_rmbg.png +0 -0
  22. demo/clean_fg_extracted/22/frames/0000.png +0 -0
  23. demo/clean_fg_extracted/22/frames/0000_rmbg.png +0 -0
  24. demo/clean_fg_extracted/9/frames/0000.png +0 -0
  25. demo/clean_fg_extracted/9/frames/0000_rmbg.png +0 -0
  26. demo_utils1.py +6 -0
  27. misc_utils/__pycache__/train_utils.cpython-310.pyc +0 -0
  28. modules/openclip/__pycache__/modules.cpython-310.pyc +0 -0
  29. pl_trainer/__pycache__/instruct_p2p_video.cpython-310.pyc +0 -0
  30. pl_trainer/inference/__pycache__/inference.cpython-310.pyc +0 -0
  31. static_fg_sync_bg_visualization_fy/14_22_100fps.png +0 -0
  32. static_fg_sync_bg_visualization_fy/14_55_100fps.png +0 -0
  33. static_fg_sync_bg_visualization_fy/15_27_100fps.png +0 -0
  34. static_fg_sync_bg_visualization_fy/18_23_100fps.png +0 -0
  35. static_fg_sync_bg_visualization_fy/18_33_100fps.png +0 -0
  36. static_fg_sync_bg_visualization_fy/22_39_100fps.png +0 -0
  37. static_fg_sync_bg_visualization_fy/22_59_100fps.png +0 -0
  38. static_fg_sync_bg_visualization_fy/9_10_100fps.png +0 -0
  39. static_fg_sync_bg_visualization_fy/9_14_100fps.png +0 -0
  40. static_fg_sync_bg_visualization_fy/9_8_100fps.png +0 -0
  41. static_fg_sync_bg_visualization_fy/9_9_100fps.png +0 -0
  42. tmp.py +483 -0
.gitignore CHANGED
@@ -2,4 +2,5 @@ app1.py
2
  app2.py
3
  demo_utils1.py
4
  tmp
5
- models
 
 
2
  app2.py
3
  demo_utils1.py
4
  tmp
5
+ models
6
+ stablediffusionapi
README.md CHANGED
@@ -1,13 +1,3 @@
1
- ---
2
- title: "RelightVid"
3
- emoji: "💡"
4
- colorFrom: "blue"
5
- colorTo: "green"
6
- sdk: "gradio" # 你的项目使用的 SDK (gradio / streamlit / docker)
7
- app_file: "app.py" # 你的主程序文件
8
- ---
9
-
10
-
11
  <!-- # <img src="assets/icon.png" style="vertical-align: -14px;" :height="50px" width="50px"> RelightVid -->
12
  # RelightVid
13
 
@@ -38,11 +28,13 @@ app_file: "app.py" # 你的主程序文件
38
 
39
 
40
  ## 📜 News
41
- 🚀 [2024/6/8] We release our [inference pipeline of Make-it-Real](#⚡-quick-start), including material matching and generation of albedo-only 3D objects.
 
 
42
 
43
- 🚀 [2024/6/8] [Material library annotations](#📦-data-preparation) generated by GPT-4V and [data engine](#⚡-quick-start) are released!
44
 
45
- 🚀 [2024/4/26] The [paper](https://arxiv.org/abs/2404.16829) and [project page](https://sunzey.github.io/Make-it-Real) are released!
46
 
47
  ## 💡 Highlights
48
  - 🔥 We first demonstrate that **GPT-4V** can effectively **recognize and describe materials**, allowing our model to precisely identifies and aligns materials with the corresponding components of 3D objects.
 
 
 
 
 
 
 
 
 
 
 
1
  <!-- # <img src="assets/icon.png" style="vertical-align: -14px;" :height="50px" width="50px"> RelightVid -->
2
  # RelightVid
3
 
 
28
 
29
 
30
  ## 📜 News
31
+ <!-- 🚀 [2024/6/8] We release our [inference pipeline of Make-it-Real](#⚡-quick-start), including material matching and generation of albedo-only 3D objects.
32
+
33
+ 🚀 [2024/6/8] [Material library annotations](#📦-data-preparation) generated by GPT-4V and [data engine](#⚡-quick-start) are released! -->
34
 
35
+ [2025/3/12] The [inference code](xxx), [project page](xxx) and [huggingface demo](xxx) are released!
36
 
37
+ [2025/1/27] We release the [paper](https://arxiv.org/abs/2501.16330) of RelightVid!
38
 
39
  ## 💡 Highlights
40
  - 🔥 We first demonstrate that **GPT-4V** can effectively **recognize and describe materials**, allowing our model to precisely identifies and aligns materials with the corresponding components of 3D objects.
__pycache__/db_examples.cpython-310.pyc CHANGED
Binary files a/__pycache__/db_examples.cpython-310.pyc and b/__pycache__/db_examples.cpython-310.pyc differ
 
__pycache__/demo_utils1.cpython-310.pyc CHANGED
Binary files a/__pycache__/demo_utils1.cpython-310.pyc and b/__pycache__/demo_utils1.cpython-310.pyc differ
 
app1_a.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ from enum import Enum
5
+ import db_examples
6
+ import cv2
7
+
8
+
9
+ from demo_utils1 import *
10
+
11
+ from misc_utils.train_utils import unit_test_create_model
12
+ from misc_utils.image_utils import save_tensor_to_gif, save_tensor_to_images
13
+ import os
14
+ from PIL import Image
15
+ import torch
16
+ import torchvision
17
+ from torchvision import transforms
18
+ from einops import rearrange
19
+ import imageio
20
+ import time
21
+
22
+ from torchvision.transforms import functional as F
23
+ from torch.hub import download_url_to_file
24
+
25
+ import os
26
+
27
+ # 推理设置
28
+ from pl_trainer.inference.inference import InferenceIP2PVideo
29
+ from tqdm import tqdm
30
+
31
+
32
+ # if not os.path.exists(filename):
33
+ # original_path = os.getcwd()
34
+ # base_path = './models'
35
+ # os.makedirs(base_path, exist_ok=True)
36
+
37
+ # # 直接在代码中写入 Token(注意安全风险)
38
+ # GIT_TOKEN = "955b8ea91095840b76fe38b90a088c200d4c813c"
39
+ # repo_url = f"https://YeFang:{GIT_TOKEN}@code.openxlab.org.cn/YeFang/RIV_models.git"
40
+
41
+ # try:
42
+ # if os.system(f'git clone {repo_url} {base_path}') != 0:
43
+ # raise RuntimeError("Git 克隆失败")
44
+ # os.chdir(base_path)
45
+ # if os.system('git lfs pull') != 0:
46
+ # raise RuntimeError("Git LFS 拉取失败")
47
+ # finally:
48
+ # os.chdir(original_path)
49
+
50
+ def tensor_to_pil_image(x):
51
+ """
52
+ 将 4D PyTorch 张量转换为 PIL 图像。
53
+ """
54
+ x = x.float() # 确保张量类型为 float
55
+ grid_img = torchvision.utils.make_grid(x, nrow=4).permute(1, 2, 0).detach().cpu().numpy()
56
+ grid_img = (grid_img * 255).clip(0, 255).astype("uint8") # 将 [0, 1] 范围转换为 [0, 255]
57
+ return Image.fromarray(grid_img)
58
+
59
+ def frame_to_batch(x):
60
+ """
61
+ 将帧维度转换为批次维度。
62
+ """
63
+ return rearrange(x, 'b f c h w -> (b f) c h w')
64
+
65
+ def clip_image(x, min=0., max=1.):
66
+ """
67
+ 将图像张量裁剪到指定的最小和最大值。
68
+ """
69
+ return torch.clamp(x, min=min, max=max)
70
+
71
+ def unnormalize(x):
72
+ """
73
+ 将张量范围从 [-1, 1] 转换到 [0, 1]。
74
+ """
75
+ return (x + 1) / 2
76
+
77
+
78
+ # 读取图像文件
79
+ def read_images_from_directory(directory, num_frames=16):
80
+ images = []
81
+ for i in range(num_frames):
82
+ img_path = os.path.join(directory, f'{i:04d}.png')
83
+ img = imageio.imread(img_path)
84
+ images.append(torch.tensor(img).permute(2, 0, 1)) # Convert to Tensor (C, H, W)
85
+ return images
86
+
87
+ def load_and_process_images(folder_path):
88
+ """
89
+ 读取文件夹中的所有图片,将它们转换为 [-1, 1] 范围的张量并返回一个 4D 张量。
90
+ """
91
+ processed_images = []
92
+ transform = transforms.Compose([
93
+ transforms.ToTensor(),
94
+ transforms.Lambda(lambda x: x * 2 - 1) # 将 [0, 1] 转换为 [-1, 1]
95
+ ])
96
+ for filename in sorted(os.listdir(folder_path)):
97
+ if filename.endswith(".png"):
98
+ img_path = os.path.join(folder_path, filename)
99
+ image = Image.open(img_path).convert("RGB")
100
+ processed_image = transform(image)
101
+ processed_images.append(processed_image)
102
+ return torch.stack(processed_images) # 返回 4D 张量
103
+
104
+ def load_and_process_video(video_path, num_frames=16, crop_size=512):
105
+ """
106
+ 读取视频文件中的前 num_frames 帧,将每一帧转换为 [-1, 1] 范围的张量,
107
+ 并进行中心裁剪至 crop_size x crop_size,返回一个 4D 张量。
108
+ """
109
+ processed_frames = []
110
+ transform = transforms.Compose([
111
+ transforms.CenterCrop(crop_size), # 中心裁剪
112
+ transforms.ToTensor(),
113
+ transforms.Lambda(lambda x: x * 2 - 1) # 将 [0, 1] 转换为 [-1, 1]
114
+ ])
115
+
116
+ # 使用 OpenCV 读取视频
117
+ cap = cv2.VideoCapture(video_path)
118
+
119
+ if not cap.isOpened():
120
+ raise ValueError(f"无法打开视频文件: {video_path}")
121
+
122
+ frame_count = 0
123
+
124
+ while frame_count < num_frames:
125
+ ret, frame = cap.read()
126
+ if not ret:
127
+ break # 视频帧读取完毕或视频帧不足
128
+
129
+ # 转换为 RGB 格式
130
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
131
+ image = Image.fromarray(frame)
132
+
133
+ # 应用转换
134
+ processed_frame = transform(image)
135
+ processed_frames.append(processed_frame)
136
+
137
+ frame_count += 1
138
+
139
+ cap.release() # 释放视频资源
140
+
141
+ if len(processed_frames) < num_frames:
142
+ raise ValueError(f"视频帧不足 {num_frames} 帧,仅找到 {len(processed_frames)} 帧。")
143
+
144
+ return torch.stack(processed_frames) # 返回 4D 张量 (帧数, 通道数, 高度, 宽度)
145
+
146
+
147
+ def clear_cache(output_path):
148
+ if os.path.exists(output_path):
149
+ os.remove(output_path)
150
+ return None
151
+
152
+
153
+ #! 加载模型
154
+ # 配置路径和加载模型
155
+ config_path = 'configs/instruct_v2v_ic_gradio.yaml'
156
+ diffusion_model = unit_test_create_model(config_path)
157
+ diffusion_model = diffusion_model.to('cuda')
158
+
159
+ # 加载模型检查点
160
+ # ckpt_path = 'models/relvid_mm_sd15_fbc_unet.pth' #! change
161
+ # ckpt_path = 'tmp/pytorch_model.bin'
162
+ # 下载文件
163
+
164
+ os.makedirs('models', exist_ok=True)
165
+ model_path = "models/relvid_mm_sd15_fbc_unet.pth"
166
+
167
+ if not os.path.exists(model_path):
168
+ download_url_to_file(url='https://huggingface.co/aleafy/RelightVid/resolve/main/relvid_mm_sd15_fbc_unet.pth', dst=model_path)
169
+
170
+
171
+ ckpt = torch.load(model_path, map_location='cpu')
172
+ diffusion_model.load_state_dict(ckpt, strict=False)
173
+
174
+
175
+ # import pdb; pdb.set_trace()
176
+
177
+ # 更改全局临时目录
178
+ new_tmp_dir = "./demo/gradio_bg"
179
+ os.makedirs(new_tmp_dir, exist_ok=True)
180
+
181
+ # import pdb; pdb.set_trace()
182
+
183
+ def save_video_from_frames(image_pred, save_pth, fps=8):
184
+ """
185
+ 将 image_pred 中的帧保存为视频文件。
186
+
187
+ 参数:
188
+ - image_pred: Tensor,形状为 (1, 16, 3, 512, 512)
189
+ - save_pth: 保存视频的路径,例如 "output_video.mp4"
190
+ - fps: 视频的帧率
191
+ """
192
+ # 视频参数
193
+ num_frames = image_pred.shape[1]
194
+ frame_height, frame_width = 512, 512 # 目标尺寸
195
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 使用 mp4 编码格式
196
+
197
+ # 创建 VideoWriter 对象
198
+ out = cv2.VideoWriter(save_pth, fourcc, fps, (frame_width, frame_height))
199
+
200
+ for i in range(num_frames):
201
+ # 反归一化 + 转换为 0-255 范围
202
+ pred_frame = clip_image(unnormalize(image_pred[0][i].unsqueeze(0))) * 255
203
+ pred_frame_resized = pred_frame.squeeze(0).detach().cpu() # (3, 512, 512)
204
+ pred_frame_resized = pred_frame_resized.permute(1, 2, 0).numpy().astype("uint8") # (512, 512, 3)
205
+
206
+ # Resize 到 256x256
207
+ pred_frame_resized = cv2.resize(pred_frame_resized, (frame_width, frame_height))
208
+
209
+ # 将 RGB 转为 BGR(因为 OpenCV 使用 BGR 格式)
210
+ pred_frame_bgr = cv2.cvtColor(pred_frame_resized, cv2.COLOR_RGB2BGR)
211
+
212
+ # 写入帧到视频
213
+ out.write(pred_frame_bgr)
214
+
215
+ # 释放 VideoWriter 资源
216
+ out.release()
217
+ print(f"视频已保存至 {save_pth}")
218
+
219
+
220
+ inf_pipe = InferenceIP2PVideo(
221
+ diffusion_model.unet,
222
+ scheduler='ddpm',
223
+ num_ddim_steps=20
224
+ )
225
+
226
+ # 伪函数占位(生成空白视频)
227
+ def dummy_process(input_fg, input_bg):
228
+ # import pdb; pdb.set_trace()
229
+
230
+ diffusion_model.to(torch.float16)
231
+ fg_tensor = load_and_process_video(input_fg).cuda().unsqueeze(0).to(dtype=torch.float16)
232
+ bg_tensor = load_and_process_video(input_bg).cuda().unsqueeze(0).to(dtype=torch.float16) # (1, 16, 4, 64, 64)
233
+
234
+ cond_fg_tensor = diffusion_model.encode_image_to_latent(fg_tensor) # (1, 16, 4, 64, 64)
235
+ cond_bg_tensor = diffusion_model.encode_image_to_latent(bg_tensor)
236
+ cond_tensor = torch.cat((cond_fg_tensor, cond_bg_tensor), dim=2)
237
+
238
+ # 初始化潜变量
239
+ init_latent = torch.randn_like(cond_fg_tensor)
240
+
241
+ EDIT_PROMPT = 'change the background'
242
+ VIDEO_CFG = 1.2
243
+ TEXT_CFG = 7.5
244
+ text_cond = diffusion_model.encode_text([EDIT_PROMPT]) # (1, 77, 768)
245
+ text_uncond = diffusion_model.encode_text([''])
246
+ # to float16
247
+ print('------------to float 16----------------')
248
+ init_latent, text_cond, text_uncond, cond_tensor = (
249
+ init_latent.to(dtype=torch.float16),
250
+ text_cond.to(dtype=torch.float16),
251
+ text_uncond.to(dtype=torch.float16),
252
+ cond_tensor.to(dtype=torch.float16)
253
+ )
254
+ inf_pipe.unet.to(torch.float16)
255
+ latent_pred = inf_pipe(
256
+ latent=init_latent,
257
+ text_cond=text_cond,
258
+ text_uncond=text_uncond,
259
+ img_cond=cond_tensor,
260
+ text_cfg=TEXT_CFG,
261
+ img_cfg=VIDEO_CFG,
262
+ )['latent']
263
+
264
+
265
+ image_pred = diffusion_model.decode_latent_to_image(latent_pred) # (1,16,3,512,512)
266
+ output_path = os.path.join(new_tmp_dir, f"output_{int(time.time())}.mp4")
267
+ # clear_cache(output_path)
268
+
269
+ save_video_from_frames(image_pred, output_path)
270
+ # import pdb; pdb.set_trace()
271
+ # fps = 8
272
+ # frames = []
273
+ # for i in range(16):
274
+ # pred_frame = clip_image(unnormalize(image_pred[0][i].unsqueeze(0))) * 255
275
+ # pred_frame_resized = pred_frame.squeeze(0).detach().cpu() #(3,512,512)
276
+ # pred_frame_resized = pred_frame_resized.permute(1, 2, 0).detach().cpu().numpy().astype("uint8") #(512,512,3) np
277
+ # Image.fromarray(pred_frame_resized).save(save_pth)
278
+
279
+ # # 生成一个简单的黑色视频作为示例
280
+ # output_path = os.path.join(new_tmp_dir, "output.mp4")
281
+ # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
282
+ # out = cv2.VideoWriter(output_path, fourcc, 20.0, (512, 512))
283
+
284
+ # for _ in range(60): # 生成 3 秒的视频(20fps)
285
+ # frame = np.zeros((512, 512, 3), dtype=np.uint8)
286
+ # out.write(frame)
287
+ # out.release()
288
+ torch.cuda.empty_cache()
289
+
290
+ return output_path
291
+
292
+ # 枚举类用于背景选择
293
+ class BGSource(Enum):
294
+ UPLOAD = "Use Background Video"
295
+ UPLOAD_FLIP = "Use Flipped Background Video"
296
+ UPLOAD_REVERSE = "Use Reversed Background Video"
297
+
298
+
299
+ # Quick prompts 示例
300
+ quick_prompts = [
301
+ 'beautiful woman',
302
+ 'handsome man',
303
+ 'beautiful woman, cinematic lighting',
304
+ 'handsome man, cinematic lighting',
305
+ 'beautiful woman, natural lighting',
306
+ 'handsome man, natural lighting',
307
+ 'beautiful woman, neo punk lighting, cyberpunk',
308
+ 'handsome man, neo punk lighting, cyberpunk',
309
+ ]
310
+ quick_prompts = [[x] for x in quick_prompts]
311
+
312
+ # Gradio UI 结构
313
+ block = gr.Blocks().queue()
314
+ with block:
315
+ with gr.Row():
316
+ gr.Markdown("## IC-Light (Relighting with Foreground and Background Video Condition)")
317
+
318
+ with gr.Row():
319
+ with gr.Column():
320
+ with gr.Row():
321
+ input_fg = gr.Video(label="Foreground Video", height=370, width=370, visible=True)
322
+ input_bg = gr.Video(label="Background Video", height=370, width=370, visible=True)
323
+
324
+ prompt = gr.Textbox(label="Prompt")
325
+ bg_source = gr.Radio(choices=[e.value for e in BGSource],
326
+ value=BGSource.UPLOAD.value,
327
+ label="Background Source", type='value')
328
+
329
+ example_prompts = gr.Dataset(samples=quick_prompts, label='Prompt Quick List', components=[prompt])
330
+ bg_gallery = gr.Gallery(height=450, object_fit='contain', label='Background Quick List', value=db_examples.bg_samples, columns=5, allow_preview=False)
331
+ relight_button = gr.Button(value="Relight")
332
+
333
+ with gr.Group():
334
+ with gr.Row():
335
+ num_samples = gr.Slider(label="Videos", minimum=1, maximum=12, value=1, step=1)
336
+ seed = gr.Number(label="Seed", value=12345, precision=0)
337
+ with gr.Row():
338
+ video_width = gr.Slider(label="Video Width", minimum=256, maximum=1024, value=512, step=64)
339
+ video_height = gr.Slider(label="Video Height", minimum=256, maximum=1024, value=640, step=64)
340
+
341
+ with gr.Accordion("Advanced options", open=False):
342
+ steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
343
+ cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=7.0, step=0.01)
344
+ highres_scale = gr.Slider(label="Highres Scale", minimum=1.0, maximum=3.0, value=1.5, step=0.01)
345
+ highres_denoise = gr.Slider(label="Highres Denoise", minimum=0.1, maximum=0.9, value=0.5, step=0.01)
346
+ a_prompt = gr.Textbox(label="Added Prompt", value='best quality')
347
+ n_prompt = gr.Textbox(label="Negative Prompt", value='lowres, bad anatomy, bad hands, cropped, worst quality')
348
+ normal_button = gr.Button(value="Compute Normal (4x Slower)")
349
+
350
+ with gr.Column():
351
+ result_video = gr.Video(label='Output Video', height=600, width=600, visible=True)
352
+ fg_gallery = gr.Gallery(width=600, object_fit='contain', label='Foreground Quick List', value=db_examples.bg_samples, columns=4, allow_preview=False)
353
+
354
+ # 输入列表
355
+ # ips = [input_fg, input_bg, prompt, video_width, video_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, bg_source]
356
+ ips = [input_fg, input_bg]
357
+
358
+ # 按钮绑定处理函数
359
+ # relight_button.click(fn=lambda: None, inputs=[], outputs=[result_video])
360
+
361
+ relight_button.click(fn=dummy_process, inputs=ips, outputs=[result_video])
362
+
363
+ normal_button.click(fn=dummy_process, inputs=ips, outputs=[result_video])
364
+
365
+ # 背景库选择
366
+ def bg_gallery_selected(gal, evt: gr.SelectData):
367
+ # import pdb; pdb.set_trace()
368
+ # img_path = gal[evt.index][0]
369
+ img_path = db_examples.bg_samples[evt.index]
370
+ video_path = img_path.replace('frames/0000.png', 'cropped_video.mp4')
371
+ return video_path
372
+
373
+ bg_gallery.select(bg_gallery_selected, inputs=bg_gallery, outputs=input_bg)
374
+
375
+ # 示例
376
+ # dummy_video_for_outputs = gr.Video(visible=False, label='Result')
377
+ gr.Examples(
378
+ fn=lambda *args: args[-1],
379
+ examples=db_examples.background_conditioned_examples,
380
+ inputs=[input_fg, input_bg, prompt, bg_source, video_width, video_height, seed, result_video],
381
+ outputs=[result_video],
382
+ run_on_click=True, examples_per_page=1024
383
+ )
384
+
385
+ # 启动 Gradio 应用
386
+ block.launch(server_name='0.0.0.0', server_port=10003, share=True)
app1_bf.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ from enum import Enum
5
+ import db_examples
6
+ import cv2
7
+
8
+
9
+ from demo_utils1 import *
10
+
11
+ from misc_utils.train_utils import unit_test_create_model
12
+ from misc_utils.image_utils import save_tensor_to_gif, save_tensor_to_images
13
+ import os
14
+ from PIL import Image
15
+ import torch
16
+ import torchvision
17
+ from torchvision import transforms
18
+ from einops import rearrange
19
+ import imageio
20
+ import time
21
+
22
+ from torchvision.transforms import functional as F
23
+ from torch.hub import download_url_to_file
24
+
25
+ import os
26
+
27
+ # 推理设置
28
+ from pl_trainer.inference.inference import InferenceIP2PVideo
29
+ from tqdm import tqdm
30
+
31
+
32
+ # if not os.path.exists(filename):
33
+ # original_path = os.getcwd()
34
+ # base_path = './models'
35
+ # os.makedirs(base_path, exist_ok=True)
36
+
37
+ # # 直接在代码中写入 Token(注意安全风险)
38
+ # GIT_TOKEN = "955b8ea91095840b76fe38b90a088c200d4c813c"
39
+ # repo_url = f"https://YeFang:{GIT_TOKEN}@code.openxlab.org.cn/YeFang/RIV_models.git"
40
+
41
+ # try:
42
+ # if os.system(f'git clone {repo_url} {base_path}') != 0:
43
+ # raise RuntimeError("Git 克隆失败")
44
+ # os.chdir(base_path)
45
+ # if os.system('git lfs pull') != 0:
46
+ # raise RuntimeError("Git LFS 拉取失败")
47
+ # finally:
48
+ # os.chdir(original_path)
49
+
50
+ def tensor_to_pil_image(x):
51
+ """
52
+ 将 4D PyTorch 张量转换为 PIL 图像。
53
+ """
54
+ x = x.float() # 确保张量类型为 float
55
+ grid_img = torchvision.utils.make_grid(x, nrow=4).permute(1, 2, 0).detach().cpu().numpy()
56
+ grid_img = (grid_img * 255).clip(0, 255).astype("uint8") # 将 [0, 1] 范围转换为 [0, 255]
57
+ return Image.fromarray(grid_img)
58
+
59
+ def frame_to_batch(x):
60
+ """
61
+ 将帧维度转换为批次维度。
62
+ """
63
+ return rearrange(x, 'b f c h w -> (b f) c h w')
64
+
65
+ def clip_image(x, min=0., max=1.):
66
+ """
67
+ 将图像张量裁剪到指定的最小和最大值。
68
+ """
69
+ return torch.clamp(x, min=min, max=max)
70
+
71
+ def unnormalize(x):
72
+ """
73
+ 将张量范围从 [-1, 1] 转换到 [0, 1]。
74
+ """
75
+ return (x + 1) / 2
76
+
77
+
78
+ # 读取图像文件
79
+ def read_images_from_directory(directory, num_frames=16):
80
+ images = []
81
+ for i in range(num_frames):
82
+ img_path = os.path.join(directory, f'{i:04d}.png')
83
+ img = imageio.imread(img_path)
84
+ images.append(torch.tensor(img).permute(2, 0, 1)) # Convert to Tensor (C, H, W)
85
+ return images
86
+
87
+ def load_and_process_images(folder_path):
88
+ """
89
+ 读取文件夹中的所有图片,将它们转换为 [-1, 1] 范围的张量并返回一个 4D 张量。
90
+ """
91
+ processed_images = []
92
+ transform = transforms.Compose([
93
+ transforms.ToTensor(),
94
+ transforms.Lambda(lambda x: x * 2 - 1) # 将 [0, 1] 转换为 [-1, 1]
95
+ ])
96
+ for filename in sorted(os.listdir(folder_path)):
97
+ if filename.endswith(".png"):
98
+ img_path = os.path.join(folder_path, filename)
99
+ image = Image.open(img_path).convert("RGB")
100
+ processed_image = transform(image)
101
+ processed_images.append(processed_image)
102
+ return torch.stack(processed_images) # 返回 4D 张量
103
+
104
+ def load_and_process_video(video_path, num_frames=16, crop_size=512):
105
+ """
106
+ 读取视频文件中的前 num_frames 帧,将每一帧转换为 [-1, 1] 范围的张量,
107
+ 并进行中心裁剪至 crop_size x crop_size,返回一个 4D 张量。
108
+ """
109
+ processed_frames = []
110
+ transform = transforms.Compose([
111
+ transforms.CenterCrop(crop_size), # 中心裁剪
112
+ transforms.ToTensor(),
113
+ transforms.Lambda(lambda x: x * 2 - 1) # 将 [0, 1] 转换为 [-1, 1]
114
+ ])
115
+
116
+ # 使用 OpenCV 读取视频
117
+ cap = cv2.VideoCapture(video_path)
118
+
119
+ if not cap.isOpened():
120
+ raise ValueError(f"无法打开视频文件: {video_path}")
121
+
122
+ frame_count = 0
123
+
124
+ while frame_count < num_frames:
125
+ ret, frame = cap.read()
126
+ if not ret:
127
+ break # 视频帧读取完毕或视频帧不足
128
+
129
+ # 转换为 RGB 格式
130
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
131
+ image = Image.fromarray(frame)
132
+
133
+ # 应用转换
134
+ processed_frame = transform(image)
135
+ processed_frames.append(processed_frame)
136
+
137
+ frame_count += 1
138
+
139
+ cap.release() # 释放视频资源
140
+
141
+ if len(processed_frames) < num_frames:
142
+ raise ValueError(f"视频帧不足 {num_frames} 帧,仅找到 {len(processed_frames)} 帧。")
143
+
144
+ return torch.stack(processed_frames) # 返回 4D 张量 (帧数, 通道数, 高度, 宽度)
145
+
146
+
147
+ def clear_cache(output_path):
148
+ if os.path.exists(output_path):
149
+ os.remove(output_path)
150
+ return None
151
+
152
+
153
+ #! 加载模型
154
+ # 配置路径和加载模型
155
+ config_path = 'configs/instruct_v2v_ic_gradio.yaml'
156
+ diffusion_model = unit_test_create_model(config_path)
157
+ diffusion_model = diffusion_model.to('cuda')
158
+
159
+ # 加载模型检查点
160
+ # ckpt_path = 'models/relvid_mm_sd15_fbc_unet.pth' #! change
161
+ # ckpt_path = 'tmp/pytorch_model.bin'
162
+ # 下载文件
163
+
164
+ os.makedirs('models', exist_ok=True)
165
+ model_path = "models/relvid_mm_sd15_fbc_unet.pth"
166
+
167
+ if not os.path.exists(model_path):
168
+ download_url_to_file(url='https://huggingface.co/aleafy/RelightVid/resolve/main/relvid_mm_sd15_fbc_unet.pth', dst=model_path)
169
+
170
+
171
+ ckpt = torch.load(model_path, map_location='cpu')
172
+ diffusion_model.load_state_dict(ckpt, strict=False)
173
+
174
+
175
+ # import pdb; pdb.set_trace()
176
+
177
+ # 更改全局临时目录
178
+ new_tmp_dir = "./demo/gradio_bg"
179
+ os.makedirs(new_tmp_dir, exist_ok=True)
180
+
181
+ # import pdb; pdb.set_trace()
182
+
183
+ def save_video_from_frames(image_pred, save_pth, fps=8):
184
+ """
185
+ 将 image_pred 中的帧保存为视频文件。
186
+
187
+ 参数:
188
+ - image_pred: Tensor,形状为 (1, 16, 3, 512, 512)
189
+ - save_pth: 保存视频的路径,例如 "output_video.mp4"
190
+ - fps: 视频的帧率
191
+ """
192
+ # 视频参数
193
+ num_frames = image_pred.shape[1]
194
+ frame_height, frame_width = 512, 512 # 目标尺寸
195
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 使用 mp4 编码格式
196
+
197
+ # 创建 VideoWriter 对象
198
+ out = cv2.VideoWriter(save_pth, fourcc, fps, (frame_width, frame_height))
199
+
200
+ for i in range(num_frames):
201
+ # 反归一化 + 转换为 0-255 范围
202
+ pred_frame = clip_image(unnormalize(image_pred[0][i].unsqueeze(0))) * 255
203
+ pred_frame_resized = pred_frame.squeeze(0).detach().cpu() # (3, 512, 512)
204
+ pred_frame_resized = pred_frame_resized.permute(1, 2, 0).numpy().astype("uint8") # (512, 512, 3)
205
+
206
+ # Resize 到 256x256
207
+ pred_frame_resized = cv2.resize(pred_frame_resized, (frame_width, frame_height))
208
+
209
+ # 将 RGB 转为 BGR(因为 OpenCV 使用 BGR 格式)
210
+ pred_frame_bgr = cv2.cvtColor(pred_frame_resized, cv2.COLOR_RGB2BGR)
211
+
212
+ # 写入帧到视频
213
+ out.write(pred_frame_bgr)
214
+
215
+ # 释放 VideoWriter 资源
216
+ out.release()
217
+ print(f"视频已保存至 {save_pth}")
218
+
219
+
220
+ inf_pipe = InferenceIP2PVideo(
221
+ diffusion_model.unet,
222
+ scheduler='ddpm',
223
+ num_ddim_steps=20
224
+ )
225
+
226
+ # 伪函数占位(生成空白视频)
227
+ def dummy_process(input_fg, input_bg):
228
+ # import pdb; pdb.set_trace()
229
+
230
+ diffusion_model.to(torch.float16)
231
+ fg_tensor = load_and_process_video(input_fg).cuda().unsqueeze(0).to(dtype=torch.float16)
232
+ bg_tensor = load_and_process_video(input_bg).cuda().unsqueeze(0).to(dtype=torch.float16) # (1, 16, 4, 64, 64)
233
+
234
+ cond_fg_tensor = diffusion_model.encode_image_to_latent(fg_tensor) # (1, 16, 4, 64, 64)
235
+ cond_bg_tensor = diffusion_model.encode_image_to_latent(bg_tensor)
236
+ cond_tensor = torch.cat((cond_fg_tensor, cond_bg_tensor), dim=2)
237
+
238
+ # 初始化潜变量
239
+ init_latent = torch.randn_like(cond_fg_tensor)
240
+
241
+ EDIT_PROMPT = 'change the background'
242
+ VIDEO_CFG = 1.2
243
+ TEXT_CFG = 7.5
244
+ text_cond = diffusion_model.encode_text([EDIT_PROMPT]) # (1, 77, 768)
245
+ text_uncond = diffusion_model.encode_text([''])
246
+ # to float16
247
+ print('------------to float 16----------------')
248
+ init_latent, text_cond, text_uncond, cond_tensor = (
249
+ init_latent.to(dtype=torch.float16),
250
+ text_cond.to(dtype=torch.float16),
251
+ text_uncond.to(dtype=torch.float16),
252
+ cond_tensor.to(dtype=torch.float16)
253
+ )
254
+ inf_pipe.unet.to(torch.float16)
255
+ latent_pred = inf_pipe(
256
+ latent=init_latent,
257
+ text_cond=text_cond,
258
+ text_uncond=text_uncond,
259
+ img_cond=cond_tensor,
260
+ text_cfg=TEXT_CFG,
261
+ img_cfg=VIDEO_CFG,
262
+ )['latent']
263
+
264
+
265
+ image_pred = diffusion_model.decode_latent_to_image(latent_pred) # (1,16,3,512,512)
266
+ output_path = os.path.join(new_tmp_dir, f"output_{int(time.time())}.mp4")
267
+ # clear_cache(output_path)
268
+
269
+ save_video_from_frames(image_pred, output_path)
270
+ # import pdb; pdb.set_trace()
271
+ # fps = 8
272
+ # frames = []
273
+ # for i in range(16):
274
+ # pred_frame = clip_image(unnormalize(image_pred[0][i].unsqueeze(0))) * 255
275
+ # pred_frame_resized = pred_frame.squeeze(0).detach().cpu() #(3,512,512)
276
+ # pred_frame_resized = pred_frame_resized.permute(1, 2, 0).detach().cpu().numpy().astype("uint8") #(512,512,3) np
277
+ # Image.fromarray(pred_frame_resized).save(save_pth)
278
+
279
+ # # 生成一个简单的黑色视频作为示例
280
+ # output_path = os.path.join(new_tmp_dir, "output.mp4")
281
+ # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
282
+ # out = cv2.VideoWriter(output_path, fourcc, 20.0, (512, 512))
283
+
284
+ # for _ in range(60): # 生成 3 秒的视频(20fps)
285
+ # frame = np.zeros((512, 512, 3), dtype=np.uint8)
286
+ # out.write(frame)
287
+ # out.release()
288
+ torch.cuda.empty_cache()
289
+
290
+ return output_path
291
+
292
+ # 枚举类用于背景选择
293
+ class BGSource(Enum):
294
+ UPLOAD = "Use Background Video"
295
+ UPLOAD_FLIP = "Use Flipped Background Video"
296
+ LEFT = "Left Light"
297
+ RIGHT = "Right Light"
298
+ TOP = "Top Light"
299
+ BOTTOM = "Bottom Light"
300
+ GREY = "Ambient"
301
+
302
+ # Quick prompts 示例
303
+ quick_prompts = [
304
+ 'beautiful woman',
305
+ 'handsome man',
306
+ 'beautiful woman, cinematic lighting',
307
+ 'handsome man, cinematic lighting',
308
+ 'beautiful woman, natural lighting',
309
+ 'handsome man, natural lighting',
310
+ 'beautiful woman, neo punk lighting, cyberpunk',
311
+ 'handsome man, neo punk lighting, cyberpunk',
312
+ ]
313
+ quick_prompts = [[x] for x in quick_prompts]
314
+
315
+ # Gradio UI 结构
316
+ block = gr.Blocks().queue()
317
+ with block:
318
+ with gr.Row():
319
+ gr.Markdown("## IC-Light (Relighting with Foreground and Background Video Condition)")
320
+
321
+ with gr.Row():
322
+ with gr.Column():
323
+ with gr.Row():
324
+ input_fg = gr.Video(label="Foreground Video", height=370, width=370, visible=True)
325
+ input_bg = gr.Video(label="Background Video", height=370, width=370, visible=True)
326
+
327
+ prompt = gr.Textbox(label="Prompt")
328
+ bg_source = gr.Radio(choices=[e.value for e in BGSource],
329
+ value=BGSource.UPLOAD.value,
330
+ label="Background Source", type='value')
331
+
332
+ example_prompts = gr.Dataset(samples=quick_prompts, label='Prompt Quick List', components=[prompt])
333
+ bg_gallery = gr.Gallery(height=450, object_fit='contain', label='Background Quick List', value=db_examples.bg_samples, columns=5, allow_preview=False)
334
+ relight_button = gr.Button(value="Relight")
335
+
336
+ with gr.Group():
337
+ with gr.Row():
338
+ num_samples = gr.Slider(label="Videos", minimum=1, maximum=12, value=1, step=1)
339
+ seed = gr.Number(label="Seed", value=12345, precision=0)
340
+ with gr.Row():
341
+ video_width = gr.Slider(label="Video Width", minimum=256, maximum=1024, value=512, step=64)
342
+ video_height = gr.Slider(label="Video Height", minimum=256, maximum=1024, value=640, step=64)
343
+
344
+ with gr.Accordion("Advanced options", open=False):
345
+ steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
346
+ cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=7.0, step=0.01)
347
+ highres_scale = gr.Slider(label="Highres Scale", minimum=1.0, maximum=3.0, value=1.5, step=0.01)
348
+ highres_denoise = gr.Slider(label="Highres Denoise", minimum=0.1, maximum=0.9, value=0.5, step=0.01)
349
+ a_prompt = gr.Textbox(label="Added Prompt", value='best quality')
350
+ n_prompt = gr.Textbox(label="Negative Prompt", value='lowres, bad anatomy, bad hands, cropped, worst quality')
351
+ normal_button = gr.Button(value="Compute Normal (4x Slower)")
352
+
353
+ with gr.Column():
354
+ result_video = gr.Video(label='Output Video', height=600, width=600, visible=True)
355
+
356
+ # 输入列表
357
+ # ips = [input_fg, input_bg, prompt, video_width, video_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, bg_source]
358
+ ips = [input_fg, input_bg]
359
+
360
+ # 按钮绑定处理函数
361
+ # relight_button.click(fn=lambda: None, inputs=[], outputs=[result_video])
362
+
363
+ relight_button.click(fn=dummy_process, inputs=ips, outputs=[result_video])
364
+
365
+ normal_button.click(fn=dummy_process, inputs=ips, outputs=[result_video])
366
+
367
+ # 背景库选择
368
+ def bg_gallery_selected(gal, evt: gr.SelectData):
369
+ # import pdb; pdb.set_trace()
370
+ # img_path = gal[evt.index][0]
371
+ img_path = db_examples.bg_samples[evt.index]
372
+ video_path = img_path.replace('frames/0000.png', 'cropped_video.mp4')
373
+ return video_path
374
+
375
+ bg_gallery.select(bg_gallery_selected, inputs=bg_gallery, outputs=input_bg)
376
+
377
+ # 示例
378
+ # dummy_video_for_outputs = gr.Video(visible=False, label='Result')
379
+ gr.Examples(
380
+ fn=lambda *args: args[-1],
381
+ examples=db_examples.background_conditioned_examples,
382
+ inputs=[input_fg, input_bg, prompt, bg_source, video_width, video_height, seed, result_video],
383
+ outputs=[result_video],
384
+ run_on_click=True, examples_per_page=1024
385
+ )
386
+
387
+ # 启动 Gradio 应用
388
+ block.launch(server_name='0.0.0.0', server_port=10002, share=True)
app1_bf2.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ from enum import Enum
5
+ import db_examples
6
+ import cv2
7
+
8
+
9
+ from demo_utils1 import *
10
+
11
+ from misc_utils.train_utils import unit_test_create_model
12
+ from misc_utils.image_utils import save_tensor_to_gif, save_tensor_to_images
13
+ import os
14
+ from PIL import Image
15
+ import torch
16
+ import torchvision
17
+ from torchvision import transforms
18
+ from einops import rearrange
19
+ import imageio
20
+ import time
21
+
22
+ from torchvision.transforms import functional as F
23
+ from torch.hub import download_url_to_file
24
+
25
+ import os
26
+
27
+ # 推理设置
28
+ from pl_trainer.inference.inference import InferenceIP2PVideo
29
+ from tqdm import tqdm
30
+
31
+
32
+ # if not os.path.exists(filename):
33
+ # original_path = os.getcwd()
34
+ # base_path = './models'
35
+ # os.makedirs(base_path, exist_ok=True)
36
+
37
+ # # 直接在代码中写入 Token(注意安全风险)
38
+ # GIT_TOKEN = "955b8ea91095840b76fe38b90a088c200d4c813c"
39
+ # repo_url = f"https://YeFang:{GIT_TOKEN}@code.openxlab.org.cn/YeFang/RIV_models.git"
40
+
41
+ # try:
42
+ # if os.system(f'git clone {repo_url} {base_path}') != 0:
43
+ # raise RuntimeError("Git 克隆失败")
44
+ # os.chdir(base_path)
45
+ # if os.system('git lfs pull') != 0:
46
+ # raise RuntimeError("Git LFS 拉取失败")
47
+ # finally:
48
+ # os.chdir(original_path)
49
+
50
+ def tensor_to_pil_image(x):
51
+ """
52
+ 将 4D PyTorch 张量转换为 PIL 图像。
53
+ """
54
+ x = x.float() # 确保张量类型为 float
55
+ grid_img = torchvision.utils.make_grid(x, nrow=4).permute(1, 2, 0).detach().cpu().numpy()
56
+ grid_img = (grid_img * 255).clip(0, 255).astype("uint8") # 将 [0, 1] 范围转换为 [0, 255]
57
+ return Image.fromarray(grid_img)
58
+
59
+ def frame_to_batch(x):
60
+ """
61
+ 将帧维度转换为批次维度。
62
+ """
63
+ return rearrange(x, 'b f c h w -> (b f) c h w')
64
+
65
+ def clip_image(x, min=0., max=1.):
66
+ """
67
+ 将图像张量裁剪到指定的最小和最大值。
68
+ """
69
+ return torch.clamp(x, min=min, max=max)
70
+
71
+ def unnormalize(x):
72
+ """
73
+ 将张量范围从 [-1, 1] 转换到 [0, 1]。
74
+ """
75
+ return (x + 1) / 2
76
+
77
+
78
+ # 读取图像文件
79
+ def read_images_from_directory(directory, num_frames=16):
80
+ images = []
81
+ for i in range(num_frames):
82
+ img_path = os.path.join(directory, f'{i:04d}.png')
83
+ img = imageio.imread(img_path)
84
+ images.append(torch.tensor(img).permute(2, 0, 1)) # Convert to Tensor (C, H, W)
85
+ return images
86
+
87
+ def load_and_process_images(folder_path):
88
+ """
89
+ 读取文件夹中的所有图片,将它们转换为 [-1, 1] 范围的张量并返回一个 4D 张量。
90
+ """
91
+ processed_images = []
92
+ transform = transforms.Compose([
93
+ transforms.ToTensor(),
94
+ transforms.Lambda(lambda x: x * 2 - 1) # 将 [0, 1] 转换为 [-1, 1]
95
+ ])
96
+ for filename in sorted(os.listdir(folder_path)):
97
+ if filename.endswith(".png"):
98
+ img_path = os.path.join(folder_path, filename)
99
+ image = Image.open(img_path).convert("RGB")
100
+ processed_image = transform(image)
101
+ processed_images.append(processed_image)
102
+ return torch.stack(processed_images) # 返回 4D 张量
103
+
104
+ def load_and_process_video(video_path, num_frames=16, crop_size=512):
105
+ """
106
+ 读取视频文件中的前 num_frames 帧,将每一帧转换为 [-1, 1] 范围的张量,
107
+ 并进行中心裁剪至 crop_size x crop_size,返回一个 4D 张量。
108
+ """
109
+ processed_frames = []
110
+ transform = transforms.Compose([
111
+ transforms.CenterCrop(crop_size), # 中心裁剪
112
+ transforms.ToTensor(),
113
+ transforms.Lambda(lambda x: x * 2 - 1) # 将 [0, 1] 转换为 [-1, 1]
114
+ ])
115
+
116
+ # 使用 OpenCV 读取视频
117
+ cap = cv2.VideoCapture(video_path)
118
+
119
+ if not cap.isOpened():
120
+ raise ValueError(f"无法打开视频文件: {video_path}")
121
+
122
+ frame_count = 0
123
+
124
+ while frame_count < num_frames:
125
+ ret, frame = cap.read()
126
+ if not ret:
127
+ break # 视频帧读取完毕或视频帧不足
128
+
129
+ # 转换为 RGB 格式
130
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
131
+ image = Image.fromarray(frame)
132
+
133
+ # 应用转换
134
+ processed_frame = transform(image)
135
+ processed_frames.append(processed_frame)
136
+
137
+ frame_count += 1
138
+
139
+ cap.release() # 释放视频资源
140
+
141
+ if len(processed_frames) < num_frames:
142
+ raise ValueError(f"视频帧不足 {num_frames} 帧,仅找到 {len(processed_frames)} 帧。")
143
+
144
+ return torch.stack(processed_frames) # 返回 4D 张量 (帧数, 通道数, 高度, 宽度)
145
+
146
+
147
+ def clear_cache(output_path):
148
+ if os.path.exists(output_path):
149
+ os.remove(output_path)
150
+ return None
151
+
152
+
153
+ #! 加载模型
154
+ # 配置路径和加载模型
155
+ config_path = 'configs/instruct_v2v_ic_gradio.yaml'
156
+ diffusion_model = unit_test_create_model(config_path)
157
+ diffusion_model = diffusion_model.to('cuda')
158
+
159
+ # 加载模型检查点
160
+ # ckpt_path = 'models/relvid_mm_sd15_fbc_unet.pth' #! change
161
+ # ckpt_path = 'tmp/pytorch_model.bin'
162
+ # 下载文件
163
+
164
+ os.makedirs('models', exist_ok=True)
165
+ model_path = "models/relvid_mm_sd15_fbc_unet.pth"
166
+
167
+ if not os.path.exists(model_path):
168
+ download_url_to_file(url='https://huggingface.co/aleafy/RelightVid/resolve/main/relvid_mm_sd15_fbc_unet.pth', dst=model_path)
169
+
170
+
171
+ ckpt = torch.load(model_path, map_location='cpu')
172
+ diffusion_model.load_state_dict(ckpt, strict=False)
173
+
174
+
175
+ # import pdb; pdb.set_trace()
176
+
177
+ # 更改全局临时目录
178
+ new_tmp_dir = "./demo/gradio_bg"
179
+ os.makedirs(new_tmp_dir, exist_ok=True)
180
+
181
+ # import pdb; pdb.set_trace()
182
+
183
+ def save_video_from_frames(image_pred, save_pth, fps=8):
184
+ """
185
+ 将 image_pred 中的帧保存为视频文件。
186
+
187
+ 参数:
188
+ - image_pred: Tensor,形状为 (1, 16, 3, 512, 512)
189
+ - save_pth: 保存视频的路径,例如 "output_video.mp4"
190
+ - fps: 视频的帧率
191
+ """
192
+ # 视频参数
193
+ num_frames = image_pred.shape[1]
194
+ frame_height, frame_width = 512, 512 # 目标尺寸
195
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 使用 mp4 编码格式
196
+
197
+ # 创建 VideoWriter 对象
198
+ out = cv2.VideoWriter(save_pth, fourcc, fps, (frame_width, frame_height))
199
+
200
+ for i in range(num_frames):
201
+ # 反归一化 + 转换为 0-255 范围
202
+ pred_frame = clip_image(unnormalize(image_pred[0][i].unsqueeze(0))) * 255
203
+ pred_frame_resized = pred_frame.squeeze(0).detach().cpu() # (3, 512, 512)
204
+ pred_frame_resized = pred_frame_resized.permute(1, 2, 0).numpy().astype("uint8") # (512, 512, 3)
205
+
206
+ # Resize 到 256x256
207
+ pred_frame_resized = cv2.resize(pred_frame_resized, (frame_width, frame_height))
208
+
209
+ # 将 RGB 转为 BGR(因为 OpenCV 使用 BGR 格式)
210
+ pred_frame_bgr = cv2.cvtColor(pred_frame_resized, cv2.COLOR_RGB2BGR)
211
+
212
+ # 写入帧到视频
213
+ out.write(pred_frame_bgr)
214
+
215
+ # 释放 VideoWriter 资源
216
+ out.release()
217
+ print(f"视频已保存至 {save_pth}")
218
+
219
+
220
+ inf_pipe = InferenceIP2PVideo(
221
+ diffusion_model.unet,
222
+ scheduler='ddpm',
223
+ num_ddim_steps=20
224
+ )
225
+
226
+ # 伪函数占位(生成空白视频)
227
+ def dummy_process(input_fg, input_bg):
228
+ # import pdb; pdb.set_trace()
229
+
230
+ diffusion_model.to(torch.float16)
231
+ fg_tensor = load_and_process_video(input_fg).cuda().unsqueeze(0).to(dtype=torch.float16)
232
+ bg_tensor = load_and_process_video(input_bg).cuda().unsqueeze(0).to(dtype=torch.float16) # (1, 16, 4, 64, 64)
233
+
234
+ cond_fg_tensor = diffusion_model.encode_image_to_latent(fg_tensor) # (1, 16, 4, 64, 64)
235
+ cond_bg_tensor = diffusion_model.encode_image_to_latent(bg_tensor)
236
+ cond_tensor = torch.cat((cond_fg_tensor, cond_bg_tensor), dim=2)
237
+
238
+ # 初始化潜变量
239
+ init_latent = torch.randn_like(cond_fg_tensor)
240
+
241
+ EDIT_PROMPT = 'change the background'
242
+ VIDEO_CFG = 1.2
243
+ TEXT_CFG = 7.5
244
+ text_cond = diffusion_model.encode_text([EDIT_PROMPT]) # (1, 77, 768)
245
+ text_uncond = diffusion_model.encode_text([''])
246
+ # to float16
247
+ print('------------to float 16----------------')
248
+ init_latent, text_cond, text_uncond, cond_tensor = (
249
+ init_latent.to(dtype=torch.float16),
250
+ text_cond.to(dtype=torch.float16),
251
+ text_uncond.to(dtype=torch.float16),
252
+ cond_tensor.to(dtype=torch.float16)
253
+ )
254
+ inf_pipe.unet.to(torch.float16)
255
+ latent_pred = inf_pipe(
256
+ latent=init_latent,
257
+ text_cond=text_cond,
258
+ text_uncond=text_uncond,
259
+ img_cond=cond_tensor,
260
+ text_cfg=TEXT_CFG,
261
+ img_cfg=VIDEO_CFG,
262
+ )['latent']
263
+
264
+
265
+ image_pred = diffusion_model.decode_latent_to_image(latent_pred) # (1,16,3,512,512)
266
+ output_path = os.path.join(new_tmp_dir, f"output_{int(time.time())}.mp4")
267
+ # clear_cache(output_path)
268
+
269
+ save_video_from_frames(image_pred, output_path)
270
+ # import pdb; pdb.set_trace()
271
+ # fps = 8
272
+ # frames = []
273
+ # for i in range(16):
274
+ # pred_frame = clip_image(unnormalize(image_pred[0][i].unsqueeze(0))) * 255
275
+ # pred_frame_resized = pred_frame.squeeze(0).detach().cpu() #(3,512,512)
276
+ # pred_frame_resized = pred_frame_resized.permute(1, 2, 0).detach().cpu().numpy().astype("uint8") #(512,512,3) np
277
+ # Image.fromarray(pred_frame_resized).save(save_pth)
278
+
279
+ # # 生成一个简单的黑色视频作为示例
280
+ # output_path = os.path.join(new_tmp_dir, "output.mp4")
281
+ # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
282
+ # out = cv2.VideoWriter(output_path, fourcc, 20.0, (512, 512))
283
+
284
+ # for _ in range(60): # 生成 3 秒的视频(20fps)
285
+ # frame = np.zeros((512, 512, 3), dtype=np.uint8)
286
+ # out.write(frame)
287
+ # out.release()
288
+ torch.cuda.empty_cache()
289
+
290
+ return output_path
291
+
292
+ # 枚举类用于背景选择
293
+ class BGSource(Enum):
294
+ UPLOAD = "Use Background Video"
295
+ UPLOAD_FLIP = "Use Flipped Background Video"
296
+ UPLOAD_REVERSE = "Use Reversed Background Video"
297
+
298
+ # Quick prompts 示例
299
+ quick_prompts = [
300
+ 'beautiful woman',
301
+ 'handsome man',
302
+ 'beautiful woman, cinematic lighting',
303
+ 'handsome man, cinematic lighting',
304
+ 'beautiful woman, natural lighting',
305
+ 'handsome man, natural lighting',
306
+ 'beautiful woman, neo punk lighting, cyberpunk',
307
+ 'handsome man, neo punk lighting, cyberpunk',
308
+ ]
309
+ quick_prompts = [[x] for x in quick_prompts]
310
+
311
+ # Gradio UI 结构
312
+ block = gr.Blocks().queue()
313
+ with block:
314
+ with gr.Row():
315
+ gr.Markdown("## IC-Light (Relighting with Foreground and Background Video Condition)")
316
+
317
+ with gr.Row():
318
+ with gr.Column():
319
+ input_fg = gr.Video(label="Foreground Video", height=450, visible=True)
320
+ with gr.Column():
321
+ input_bg = gr.Video(label="Background Video", height=450, visible=True)
322
+ with gr.Column():
323
+ result_video = gr.Video(label='Output Video', height=450, visible=True)
324
+
325
+ with gr.Row():
326
+ with gr.Column():
327
+ prompt = gr.Textbox(label="Prompt")
328
+ bg_source = gr.Radio(choices=[e.value for e in BGSource],
329
+ value=BGSource.UPLOAD.value,
330
+ label="Background Source", type='value')
331
+
332
+ example_prompts = gr.Dataset(samples=quick_prompts, label='Prompt Quick List', components=[prompt])
333
+ bg_gallery = gr.Gallery(height=450, object_fit='contain', label='Background Quick List', value=db_examples.bg_samples, columns=5, allow_preview=False)
334
+ relight_button = gr.Button(value="Relight")
335
+
336
+ with gr.Group():
337
+ with gr.Row():
338
+ num_samples = gr.Slider(label="Videos", minimum=1, maximum=12, value=1, step=1)
339
+ seed = gr.Number(label="Seed", value=12345, precision=0)
340
+ with gr.Row():
341
+ video_width = gr.Slider(label="Video Width", minimum=256, maximum=1024, value=512, step=64)
342
+ video_height = gr.Slider(label="Video Height", minimum=256, maximum=1024, value=640, step=64)
343
+
344
+
345
+ with gr.Column():
346
+ with gr.Accordion("Advanced options", open=False):
347
+ steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
348
+ cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=7.0, step=0.01)
349
+ highres_scale = gr.Slider(label="Highres Scale", minimum=1.0, maximum=3.0, value=1.5, step=0.01)
350
+ highres_denoise = gr.Slider(label="Highres Denoise", minimum=0.1, maximum=0.9, value=0.5, step=0.01)
351
+ a_prompt = gr.Textbox(label="Added Prompt", value='best quality')
352
+ n_prompt = gr.Textbox(label="Negative Prompt", value='lowres, bad anatomy, bad hands, cropped, worst quality')
353
+ normal_button = gr.Button(value="Compute Normal (4x Slower)")
354
+
355
+
356
+ # 输入列表
357
+ # ips = [input_fg, input_bg, prompt, video_width, video_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, bg_source]
358
+ ips = [input_fg, input_bg]
359
+
360
+ # 按钮绑定处理函数
361
+ # relight_button.click(fn=lambda: None, inputs=[], outputs=[result_video])
362
+
363
+ relight_button.click(fn=dummy_process, inputs=ips, outputs=[result_video])
364
+
365
+ normal_button.click(fn=dummy_process, inputs=ips, outputs=[result_video])
366
+
367
+ # 背景库选择
368
+ def bg_gallery_selected(gal, evt: gr.SelectData):
369
+ # import pdb; pdb.set_trace()
370
+ # img_path = gal[evt.index][0]
371
+ img_path = db_examples.bg_samples[evt.index]
372
+ video_path = img_path.replace('frames/0000.png', 'cropped_video.mp4')
373
+ return video_path
374
+
375
+ bg_gallery.select(bg_gallery_selected, inputs=bg_gallery, outputs=input_bg)
376
+
377
+ # 示例
378
+ # dummy_video_for_outputs = gr.Video(visible=False, label='Result')
379
+ gr.Examples(
380
+ fn=lambda *args: args[-1],
381
+ examples=db_examples.background_conditioned_examples,
382
+ inputs=[input_fg, input_bg, prompt, bg_source, video_width, video_height, seed, result_video],
383
+ outputs=[result_video],
384
+ run_on_click=True, examples_per_page=1024
385
+ )
386
+
387
+ # 启动 Gradio 应用
388
+ block.launch(server_name='0.0.0.0', server_port=10002, share=True)
db_examples.py CHANGED
@@ -17,117 +17,248 @@ bg_samples = [
17
  'demo/clean_bg_extracted/62/frames/0000.png'
18
  ] # 准备大概 15 个 background视频
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  background_conditioned_examples = [
22
  [
23
- "demo/clean_fg_extracted/14/cropped_video.mp4",
24
- "demo/clean_bg_extracted/22/cropped_video.mp4",
 
25
  "beautiful woman, cinematic lighting",
26
  "Use Background Video",
27
  512,
28
- 768,
29
- 12345,
30
- "static_fg_sync_bg_visualization_fy/14_22_100fps.mp4",
31
  ],
32
  [
33
- "demo/clean_fg_extracted/14/cropped_video.mp4",
34
- "demo/clean_bg_extracted/55/cropped_video.mp4",
 
35
  "beautiful woman, cinematic lighting",
36
  "Use Background Video",
37
  512,
38
- 768,
39
- 12345,
40
- "static_fg_sync_bg_visualization_fy/14_55_100fps.mp4",
41
  ],
42
  [
43
- "demo/clean_fg_extracted/15/cropped_video.mp4",
44
- "demo/clean_bg_extracted/27/cropped_video.mp4",
 
45
  "beautiful woman, cinematic lighting",
46
  "Use Background Video",
47
  512,
48
- 768,
49
- 12345,
50
- "static_fg_sync_bg_visualization_fy/15_27_100fps.mp4",
51
  ],
52
  [
53
- "demo/clean_fg_extracted/18/cropped_video.mp4",
54
- "demo/clean_bg_extracted/23/cropped_video.mp4",
 
55
  "beautiful woman, cinematic lighting",
56
  "Use Background Video",
57
  512,
58
- 768,
59
- 12345,
60
- "static_fg_sync_bg_visualization_fy/18_23_100fps.mp4",
61
  ],
62
  # [
63
- # "demo/clean_fg_extracted/18/cropped_video.mp4",
64
- # "demo/clean_bg_extracted/33/cropped_video.mp4",
65
  # "beautiful woman, cinematic lighting",
66
  # "Use Background Video",
67
  # 512,
68
- # 768,
69
- # 12345,
70
- # "static_fg_sync_bg_visualization_fy/18_33_100fps.mp4",
71
  # ],
72
  [
73
- "demo/clean_fg_extracted/22/cropped_video.mp4",
74
- "demo/clean_bg_extracted/39/cropped_video.mp4",
 
75
  "beautiful woman, cinematic lighting",
76
  "Use Background Video",
77
  512,
78
- 768,
79
- 12345,
80
- "static_fg_sync_bg_visualization_fy/22_39_100fps.mp4",
81
  ],
82
  # [
83
- # "demo/clean_fg_extracted/22/cropped_video.mp4",
84
- # "demo/clean_bg_extracted/59/cropped_video.mp4",
85
  # "beautiful woman, cinematic lighting",
86
  # "Use Background Video",
87
  # 512,
88
- # 768,
89
- # 12345,
90
- # "static_fg_sync_bg_visualization_fy/22_59_100fps.mp4",
91
  # ],
92
  [
93
- "demo/clean_fg_extracted/9/cropped_video.mp4",
94
- "demo/clean_bg_extracted/8/cropped_video.mp4",
 
95
  "beautiful woman, cinematic lighting",
96
  "Use Background Video",
97
  512,
98
- 768,
99
- 12345,
100
- "static_fg_sync_bg_visualization_fy/9_8_100fps.mp4",
101
  ],
102
  [
103
- "demo/clean_fg_extracted/9/cropped_video.mp4",
104
- "demo/clean_bg_extracted/9/cropped_video.mp4",
 
105
  "beautiful woman, cinematic lighting",
106
  "Use Background Video",
107
  512,
108
- 768,
109
- 12345,
110
- "static_fg_sync_bg_visualization_fy/9_9_100fps.mp4",
111
  ],
112
  [
113
- "demo/clean_fg_extracted/9/cropped_video.mp4",
114
- "demo/clean_bg_extracted/10/cropped_video.mp4",
 
115
  "beautiful woman, cinematic lighting",
116
  "Use Background Video",
117
  512,
118
- 768,
119
- 12345,
120
- "static_fg_sync_bg_visualization_fy/9_10_100fps.mp4",
121
  ],
122
  # [
123
- # "demo/clean_fg_extracted/9/cropped_video.mp4",
124
- # "demo/clean_bg_extracted/14/cropped_video.mp4",
125
  # "beautiful woman, cinematic lighting",
126
  # "Use Background Video",
127
  # 512,
128
- # 768,
129
- # 12345,
130
- # "static_fg_sync_bg_visualization_fy/9_14_100fps.mp4",
131
  # ],
132
 
133
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  'demo/clean_bg_extracted/62/frames/0000.png'
18
  ] # 准备大概 15 个 background视频
19
 
20
+ fg_samples = [
21
+ 'demo/clean_fg_extracted/14/frames/0000.png',
22
+ 'demo/clean_fg_extracted/15/frames/0000.png',
23
+ 'demo/clean_fg_extracted/18/frames/0000.png',
24
+ 'demo/clean_fg_extracted/22/frames/0000.png',
25
+ 'demo/clean_fg_extracted/9/frames/0000.png',
26
+ # 'demo/clean_bg_extracted/39/frames/0000.png',
27
+ # 'demo/clean_bg_extracted/59/frames/0000.png',
28
+ # 'demo/clean_bg_extracted/55/frames/0000.png',
29
+ # 'demo/clean_bg_extracted/58/frames/0000.png',
30
+ # 'demo/clean_bg_extracted/57/frames/0000.png', #42
31
+ # 'demo/clean_bg_extracted/8/frames/0000.png',
32
+ # 'demo/clean_bg_extracted/9/frames/0000.png',
33
+ # 'demo/clean_bg_extracted/10/frames/0000.png',
34
+ # 'demo/clean_bg_extracted/14/frames/0000.png',
35
+ # 'demo/clean_bg_extracted/62/frames/0000.png'
36
+ ] # 准备大概 15 个 background视频
37
+
38
 
39
  background_conditioned_examples = [
40
  [
41
+ 1,
42
+ "demo/clean_fg_extracted/14/frames/0000.png",
43
+ "demo/clean_bg_extracted/22/frames/0000.png",
44
  "beautiful woman, cinematic lighting",
45
  "Use Background Video",
46
  512,
47
+ 512,
48
+ "static_fg_sync_bg_visualization_fy/14_22_100fps.png",
 
49
  ],
50
  [
51
+ 2,
52
+ "demo/clean_fg_extracted/14/frames/0000.png",
53
+ "demo/clean_bg_extracted/55/frames/0000.png",
54
  "beautiful woman, cinematic lighting",
55
  "Use Background Video",
56
  512,
57
+ 512,
58
+ "static_fg_sync_bg_visualization_fy/14_55_100fps.png",
 
59
  ],
60
  [
61
+ 3,
62
+ "demo/clean_fg_extracted/15/frames/0000.png",
63
+ "demo/clean_bg_extracted/27/frames/0000.png",
64
  "beautiful woman, cinematic lighting",
65
  "Use Background Video",
66
  512,
67
+ 512,
68
+ "static_fg_sync_bg_visualization_fy/15_27_100fps.png",
 
69
  ],
70
  [
71
+ 4,
72
+ "demo/clean_fg_extracted/18/frames/0000.png",
73
+ "demo/clean_bg_extracted/33/frames/0000.png", # 23->33
74
  "beautiful woman, cinematic lighting",
75
  "Use Background Video",
76
  512,
77
+ 512,
78
+ "static_fg_sync_bg_visualization_fy/18_33_100fps.png",
 
79
  ],
80
  # [
81
+ # "demo/clean_fg_extracted/18/frames/0000.png",
82
+ # "demo/clean_bg_extracted/33/frames/0000.png",
83
  # "beautiful woman, cinematic lighting",
84
  # "Use Background Video",
85
  # 512,
86
+ # 512,
87
+ #
88
+ # "static_fg_sync_bg_visualization_fy/18_33_100fps.png",
89
  # ],
90
  [
91
+ 5,
92
+ "demo/clean_fg_extracted/22/frames/0000.png",
93
+ "demo/clean_bg_extracted/59/frames/0000.png", # 39 -> 59
94
  "beautiful woman, cinematic lighting",
95
  "Use Background Video",
96
  512,
97
+ 512,
98
+ "static_fg_sync_bg_visualization_fy/22_59_100fps.png",
 
99
  ],
100
  # [
101
+ # "demo/clean_fg_extracted/22/frames/0000.png",
102
+ # "demo/clean_bg_extracted/59/frames/0000.png",
103
  # "beautiful woman, cinematic lighting",
104
  # "Use Background Video",
105
  # 512,
106
+ # 512,
107
+ #
108
+ # "static_fg_sync_bg_visualization_fy/22_59_100fps.png",
109
  # ],
110
  [
111
+ 6,
112
+ "demo/clean_fg_extracted/9/frames/0000.png",
113
+ "demo/clean_bg_extracted/8/frames/0000.png",
114
  "beautiful woman, cinematic lighting",
115
  "Use Background Video",
116
  512,
117
+ 512,
118
+
119
+ "static_fg_sync_bg_visualization_fy/9_8_100fps.png",
120
  ],
121
  [
122
+ 7,
123
+ "demo/clean_fg_extracted/9/frames/0000.png",
124
+ "demo/clean_bg_extracted/9/frames/0000.png",
125
  "beautiful woman, cinematic lighting",
126
  "Use Background Video",
127
  512,
128
+ 512,
129
+ "static_fg_sync_bg_visualization_fy/9_9_100fps.png",
 
130
  ],
131
  [
132
+ 8,
133
+ "demo/clean_fg_extracted/9/frames/0000.png",
134
+ "demo/clean_bg_extracted/10/frames/0000.png",
135
  "beautiful woman, cinematic lighting",
136
  "Use Background Video",
137
  512,
138
+ 512,
139
+
140
+ "static_fg_sync_bg_visualization_fy/9_10_100fps.png",
141
  ],
142
  # [
143
+ # "demo/clean_fg_extracted/9/frames/0000.png",
144
+ # "demo/clean_bg_extracted/14/frames/0000.png",
145
  # "beautiful woman, cinematic lighting",
146
  # "Use Background Video",
147
  # 512,
148
+ # 512,
149
+ #
150
+ # "static_fg_sync_bg_visualization_fy/9_14_100fps.png",
151
  # ],
152
 
153
  ]
154
+ # background_conditioned_examples = [
155
+ # [
156
+ # "demo/clean_fg_extracted/14/frames/0000.png",
157
+ # "demo/clean_bg_extracted/22/frames/0000.png",
158
+ # "beautiful woman, cinematic lighting",
159
+ # "Use Background Video",
160
+ # 512,
161
+ # 512,
162
+ # "static_fg_sync_bg_visualization_fy/14_22_100fps.png",
163
+ # ],
164
+ # [
165
+ # "demo/clean_fg_extracted/14/frames/0000.png",
166
+ # "demo/clean_bg_extracted/55/frames/0000.png",
167
+ # "beautiful woman, cinematic lighting",
168
+ # "Use Background Video",
169
+ # 512,
170
+ # 512,
171
+ # "static_fg_sync_bg_visualization_fy/14_55_100fps.png",
172
+ # ],
173
+ # [
174
+ # "demo/clean_fg_extracted/15/frames/0000.png",
175
+ # "demo/clean_bg_extracted/27/frames/0000.png",
176
+ # "beautiful woman, cinematic lighting",
177
+ # "Use Background Video",
178
+ # 512,
179
+ # 512,
180
+
181
+ # "static_fg_sync_bg_visualization_fy/15_27_100fps.png",
182
+ # ],
183
+ # [
184
+ # "demo/clean_fg_extracted/18/frames/0000.png",
185
+ # "demo/clean_bg_extracted/23/frames/0000.png",
186
+ # "beautiful woman, cinematic lighting",
187
+ # "Use Background Video",
188
+ # 512,
189
+ # 512,
190
+
191
+ # "static_fg_sync_bg_visualization_fy/18_23_100fps.png",
192
+ # ],
193
+ # # [
194
+ # # "demo/clean_fg_extracted/18/frames/0000.png",
195
+ # # "demo/clean_bg_extracted/33/frames/0000.png",
196
+ # # "beautiful woman, cinematic lighting",
197
+ # # "Use Background Video",
198
+ # # 512,
199
+ # # 512,
200
+ # #
201
+ # # "static_fg_sync_bg_visualization_fy/18_33_100fps.png",
202
+ # # ],
203
+ # [
204
+ # "demo/clean_fg_extracted/22/frames/0000.png",
205
+ # "demo/clean_bg_extracted/39/frames/0000.png",
206
+ # "beautiful woman, cinematic lighting",
207
+ # "Use Background Video",
208
+ # 512,
209
+ # 512,
210
+
211
+ # "static_fg_sync_bg_visualization_fy/22_39_100fps.png",
212
+ # ],
213
+ # # [
214
+ # # "demo/clean_fg_extracted/22/frames/0000.png",
215
+ # # "demo/clean_bg_extracted/59/frames/0000.png",
216
+ # # "beautiful woman, cinematic lighting",
217
+ # # "Use Background Video",
218
+ # # 512,
219
+ # # 512,
220
+ # #
221
+ # # "static_fg_sync_bg_visualization_fy/22_59_100fps.png",
222
+ # # ],
223
+ # [
224
+ # "demo/clean_fg_extracted/9/frames/0000.png",
225
+ # "demo/clean_bg_extracted/8/frames/0000.png",
226
+ # "beautiful woman, cinematic lighting",
227
+ # "Use Background Video",
228
+ # 512,
229
+ # 512,
230
+
231
+ # "static_fg_sync_bg_visualization_fy/9_8_100fps.png",
232
+ # ],
233
+ # [
234
+ # "demo/clean_fg_extracted/9/frames/0000.png",
235
+ # "demo/clean_bg_extracted/9/frames/0000.png",
236
+ # "beautiful woman, cinematic lighting",
237
+ # "Use Background Video",
238
+ # 512,
239
+ # 512,
240
+
241
+ # "static_fg_sync_bg_visualization_fy/9_9_100fps.png",
242
+ # ],
243
+ # [
244
+ # "demo/clean_fg_extracted/9/frames/0000.png",
245
+ # "demo/clean_bg_extracted/10/frames/0000.png",
246
+ # "beautiful woman, cinematic lighting",
247
+ # "Use Background Video",
248
+ # 512,
249
+ # 512,
250
+
251
+ # "static_fg_sync_bg_visualization_fy/9_10_100fps.png",
252
+ # ],
253
+ # # [
254
+ # # "demo/clean_fg_extracted/9/frames/0000.png",
255
+ # # "demo/clean_bg_extracted/14/frames/0000.png",
256
+ # # "beautiful woman, cinematic lighting",
257
+ # # "Use Background Video",
258
+ # # 512,
259
+ # # 512,
260
+ # #
261
+ # # "static_fg_sync_bg_visualization_fy/9_14_100fps.png",
262
+ # # ],
263
+
264
+ # ]
db_examples_bf.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ bg_samples = [
3
+ 'demo/clean_bg_extracted/22/frames/0000.png',
4
+ 'demo/clean_bg_extracted/23/frames/0000.png',
5
+ 'demo/clean_bg_extracted/27/frames/0000.png',
6
+ 'demo/clean_bg_extracted/33/frames/0000.png',
7
+ 'demo/clean_bg_extracted/47/frames/0000.png',
8
+ 'demo/clean_bg_extracted/39/frames/0000.png',
9
+ 'demo/clean_bg_extracted/59/frames/0000.png',
10
+ 'demo/clean_bg_extracted/55/frames/0000.png',
11
+ 'demo/clean_bg_extracted/58/frames/0000.png',
12
+ 'demo/clean_bg_extracted/57/frames/0000.png', #42
13
+ 'demo/clean_bg_extracted/8/frames/0000.png',
14
+ 'demo/clean_bg_extracted/9/frames/0000.png',
15
+ 'demo/clean_bg_extracted/10/frames/0000.png',
16
+ 'demo/clean_bg_extracted/14/frames/0000.png',
17
+ 'demo/clean_bg_extracted/62/frames/0000.png'
18
+ ] # 准备大概 15 个 background视频
19
+
20
+ fg_samples = [
21
+ 'demo/clean_fg_extracted/14/frames/0000.png',
22
+ 'demo/clean_fg_extracted/15/frames/0000.png',
23
+ 'demo/clean_fg_extracted/18/frames/0000.png',
24
+ 'demo/clean_fg_extracted/9/frames/0000.png',
25
+ 'demo/clean_fg_extracted/22/frames/0000.png',
26
+ # 'demo/clean_bg_extracted/39/frames/0000.png',
27
+ # 'demo/clean_bg_extracted/59/frames/0000.png',
28
+ # 'demo/clean_bg_extracted/55/frames/0000.png',
29
+ # 'demo/clean_bg_extracted/58/frames/0000.png',
30
+ # 'demo/clean_bg_extracted/57/frames/0000.png', #42
31
+ # 'demo/clean_bg_extracted/8/frames/0000.png',
32
+ # 'demo/clean_bg_extracted/9/frames/0000.png',
33
+ # 'demo/clean_bg_extracted/10/frames/0000.png',
34
+ # 'demo/clean_bg_extracted/14/frames/0000.png',
35
+ # 'demo/clean_bg_extracted/62/frames/0000.png'
36
+ ] # 准备大概 15 个 background视频
37
+
38
+
39
+ background_conditioned_examples = [
40
+ [
41
+ "demo/clean_fg_extracted/14/cropped_video.mp4",
42
+ "demo/clean_bg_extracted/22/cropped_video.mp4",
43
+ "beautiful woman, cinematic lighting",
44
+ "Use Background Video",
45
+ 512,
46
+ 512,
47
+ "static_fg_sync_bg_visualization_fy/14_22_100fps.mp4",
48
+ ],
49
+ [
50
+ "demo/clean_fg_extracted/14/cropped_video.mp4",
51
+ "demo/clean_bg_extracted/55/cropped_video.mp4",
52
+ "beautiful woman, cinematic lighting",
53
+ "Use Background Video",
54
+ 512,
55
+ 512,
56
+ "static_fg_sync_bg_visualization_fy/14_55_100fps.mp4",
57
+ ],
58
+ [
59
+ "demo/clean_fg_extracted/15/cropped_video.mp4",
60
+ "demo/clean_bg_extracted/27/cropped_video.mp4",
61
+ "beautiful woman, cinematic lighting",
62
+ "Use Background Video",
63
+ 512,
64
+ 512,
65
+
66
+ "static_fg_sync_bg_visualization_fy/15_27_100fps.mp4",
67
+ ],
68
+ [
69
+ "demo/clean_fg_extracted/18/cropped_video.mp4",
70
+ "demo/clean_bg_extracted/23/cropped_video.mp4",
71
+ "beautiful woman, cinematic lighting",
72
+ "Use Background Video",
73
+ 512,
74
+ 512,
75
+
76
+ "static_fg_sync_bg_visualization_fy/18_23_100fps.mp4",
77
+ ],
78
+ # [
79
+ # "demo/clean_fg_extracted/18/cropped_video.mp4",
80
+ # "demo/clean_bg_extracted/33/cropped_video.mp4",
81
+ # "beautiful woman, cinematic lighting",
82
+ # "Use Background Video",
83
+ # 512,
84
+ # 512,
85
+ #
86
+ # "static_fg_sync_bg_visualization_fy/18_33_100fps.mp4",
87
+ # ],
88
+ [
89
+ "demo/clean_fg_extracted/22/cropped_video.mp4",
90
+ "demo/clean_bg_extracted/39/cropped_video.mp4",
91
+ "beautiful woman, cinematic lighting",
92
+ "Use Background Video",
93
+ 512,
94
+ 512,
95
+
96
+ "static_fg_sync_bg_visualization_fy/22_39_100fps.mp4",
97
+ ],
98
+ # [
99
+ # "demo/clean_fg_extracted/22/cropped_video.mp4",
100
+ # "demo/clean_bg_extracted/59/cropped_video.mp4",
101
+ # "beautiful woman, cinematic lighting",
102
+ # "Use Background Video",
103
+ # 512,
104
+ # 512,
105
+ #
106
+ # "static_fg_sync_bg_visualization_fy/22_59_100fps.mp4",
107
+ # ],
108
+ [
109
+ "demo/clean_fg_extracted/9/cropped_video.mp4",
110
+ "demo/clean_bg_extracted/8/cropped_video.mp4",
111
+ "beautiful woman, cinematic lighting",
112
+ "Use Background Video",
113
+ 512,
114
+ 512,
115
+
116
+ "static_fg_sync_bg_visualization_fy/9_8_100fps.mp4",
117
+ ],
118
+ [
119
+ "demo/clean_fg_extracted/9/cropped_video.mp4",
120
+ "demo/clean_bg_extracted/9/cropped_video.mp4",
121
+ "beautiful woman, cinematic lighting",
122
+ "Use Background Video",
123
+ 512,
124
+ 512,
125
+
126
+ "static_fg_sync_bg_visualization_fy/9_9_100fps.mp4",
127
+ ],
128
+ [
129
+ "demo/clean_fg_extracted/9/cropped_video.mp4",
130
+ "demo/clean_bg_extracted/10/cropped_video.mp4",
131
+ "beautiful woman, cinematic lighting",
132
+ "Use Background Video",
133
+ 512,
134
+ 512,
135
+
136
+ "static_fg_sync_bg_visualization_fy/9_10_100fps.mp4",
137
+ ],
138
+ # [
139
+ # "demo/clean_fg_extracted/9/cropped_video.mp4",
140
+ # "demo/clean_bg_extracted/14/cropped_video.mp4",
141
+ # "beautiful woman, cinematic lighting",
142
+ # "Use Background Video",
143
+ # 512,
144
+ # 512,
145
+ #
146
+ # "static_fg_sync_bg_visualization_fy/9_14_100fps.mp4",
147
+ # ],
148
+
149
+ ]
150
+ # background_conditioned_examples = [
151
+ # [
152
+ # "demo/clean_fg_extracted/14/cropped_video.mp4",
153
+ # "demo/clean_bg_extracted/22/cropped_video.mp4",
154
+ # "beautiful woman, cinematic lighting",
155
+ # "Use Background Video",
156
+ # 512,
157
+ # 512,
158
+ # "static_fg_sync_bg_visualization_fy/14_22_100fps.mp4",
159
+ # ],
160
+ # [
161
+ # "demo/clean_fg_extracted/14/cropped_video.mp4",
162
+ # "demo/clean_bg_extracted/55/cropped_video.mp4",
163
+ # "beautiful woman, cinematic lighting",
164
+ # "Use Background Video",
165
+ # 512,
166
+ # 512,
167
+ # "static_fg_sync_bg_visualization_fy/14_55_100fps.mp4",
168
+ # ],
169
+ # [
170
+ # "demo/clean_fg_extracted/15/cropped_video.mp4",
171
+ # "demo/clean_bg_extracted/27/cropped_video.mp4",
172
+ # "beautiful woman, cinematic lighting",
173
+ # "Use Background Video",
174
+ # 512,
175
+ # 512,
176
+
177
+ # "static_fg_sync_bg_visualization_fy/15_27_100fps.mp4",
178
+ # ],
179
+ # [
180
+ # "demo/clean_fg_extracted/18/cropped_video.mp4",
181
+ # "demo/clean_bg_extracted/23/cropped_video.mp4",
182
+ # "beautiful woman, cinematic lighting",
183
+ # "Use Background Video",
184
+ # 512,
185
+ # 512,
186
+
187
+ # "static_fg_sync_bg_visualization_fy/18_23_100fps.mp4",
188
+ # ],
189
+ # # [
190
+ # # "demo/clean_fg_extracted/18/cropped_video.mp4",
191
+ # # "demo/clean_bg_extracted/33/cropped_video.mp4",
192
+ # # "beautiful woman, cinematic lighting",
193
+ # # "Use Background Video",
194
+ # # 512,
195
+ # # 512,
196
+ # #
197
+ # # "static_fg_sync_bg_visualization_fy/18_33_100fps.mp4",
198
+ # # ],
199
+ # [
200
+ # "demo/clean_fg_extracted/22/cropped_video.mp4",
201
+ # "demo/clean_bg_extracted/39/cropped_video.mp4",
202
+ # "beautiful woman, cinematic lighting",
203
+ # "Use Background Video",
204
+ # 512,
205
+ # 512,
206
+
207
+ # "static_fg_sync_bg_visualization_fy/22_39_100fps.mp4",
208
+ # ],
209
+ # # [
210
+ # # "demo/clean_fg_extracted/22/cropped_video.mp4",
211
+ # # "demo/clean_bg_extracted/59/cropped_video.mp4",
212
+ # # "beautiful woman, cinematic lighting",
213
+ # # "Use Background Video",
214
+ # # 512,
215
+ # # 512,
216
+ # #
217
+ # # "static_fg_sync_bg_visualization_fy/22_59_100fps.mp4",
218
+ # # ],
219
+ # [
220
+ # "demo/clean_fg_extracted/9/cropped_video.mp4",
221
+ # "demo/clean_bg_extracted/8/cropped_video.mp4",
222
+ # "beautiful woman, cinematic lighting",
223
+ # "Use Background Video",
224
+ # 512,
225
+ # 512,
226
+
227
+ # "static_fg_sync_bg_visualization_fy/9_8_100fps.mp4",
228
+ # ],
229
+ # [
230
+ # "demo/clean_fg_extracted/9/cropped_video.mp4",
231
+ # "demo/clean_bg_extracted/9/cropped_video.mp4",
232
+ # "beautiful woman, cinematic lighting",
233
+ # "Use Background Video",
234
+ # 512,
235
+ # 512,
236
+
237
+ # "static_fg_sync_bg_visualization_fy/9_9_100fps.mp4",
238
+ # ],
239
+ # [
240
+ # "demo/clean_fg_extracted/9/cropped_video.mp4",
241
+ # "demo/clean_bg_extracted/10/cropped_video.mp4",
242
+ # "beautiful woman, cinematic lighting",
243
+ # "Use Background Video",
244
+ # 512,
245
+ # 512,
246
+
247
+ # "static_fg_sync_bg_visualization_fy/9_10_100fps.mp4",
248
+ # ],
249
+ # # [
250
+ # # "demo/clean_fg_extracted/9/cropped_video.mp4",
251
+ # # "demo/clean_bg_extracted/14/cropped_video.mp4",
252
+ # # "beautiful woman, cinematic lighting",
253
+ # # "Use Background Video",
254
+ # # 512,
255
+ # # 512,
256
+ # #
257
+ # # "static_fg_sync_bg_visualization_fy/9_14_100fps.mp4",
258
+ # # ],
259
+
260
+ # ]
demo/clean_bg_extracted/47/cropped_video.mp4 ADDED
Binary file (109 kB). View file
 
demo/clean_bg_extracted/57/cropped_video.mp4 ADDED
Binary file (58.6 kB). View file
 
demo/clean_bg_extracted/58/cropped_video.mp4 ADDED
Binary file (695 kB). View file
 
demo/clean_bg_extracted/62/cropped_video.mp4 ADDED
Binary file (65.8 kB). View file
 
demo/clean_fg_extracted/0/cropped_video.mp4 ADDED
Binary file (36.4 kB). View file
 
demo/clean_fg_extracted/0/frames/0000.png ADDED
demo/clean_fg_extracted/14/frames/0000.png ADDED
demo/clean_fg_extracted/14/frames/0000_rmbg.png ADDED
demo/clean_fg_extracted/15/frames/0000.png ADDED
demo/clean_fg_extracted/15/frames/0000_rmbg.png ADDED
demo/clean_fg_extracted/18/frames/0000.png ADDED
demo/clean_fg_extracted/18/frames/0000_rmbg.png ADDED
demo/clean_fg_extracted/22/frames/0000.png ADDED
demo/clean_fg_extracted/22/frames/0000_rmbg.png ADDED
demo/clean_fg_extracted/9/frames/0000.png ADDED
demo/clean_fg_extracted/9/frames/0000_rmbg.png ADDED
demo_utils1.py CHANGED
@@ -1,5 +1,11 @@
1
  import os
2
 
 
 
 
 
 
 
3
  # 更改全局临时目录
4
  new_tmp_dir = "./demo/gradio_bg"
5
  os.makedirs(new_tmp_dir, exist_ok=True)
 
1
  import os
2
 
3
+ os.environ['http_proxy'] = 'http://fangye:7f0O3VtHw988kaDjsNRLzCrbwyviPctEdhszurWikiPX3KO9xZx5fTBEgpCv@10.1.20.50:23128/'
4
+ os.environ['https_proxy'] = 'http://fangye:7f0O3VtHw988kaDjsNRLzCrbwyviPctEdhszurWikiPX3KO9xZx5fTBEgpCv@10.1.20.50:23128/'
5
+ os.environ['HTTP_PROXY'] = 'http://fangye:7f0O3VtHw988kaDjsNRLzCrbwyviPctEdhszurWikiPX3KO9xZx5fTBEgpCv@10.1.20.50:23128/'
6
+ os.environ['HTTPS_PROXY'] = 'http://fangye:7f0O3VtHw988kaDjsNRLzCrbwyviPctEdhszurWikiPX3KO9xZx5fTBEgpCv@10.1.20.50:23128/'
7
+
8
+
9
  # 更改全局临时目录
10
  new_tmp_dir = "./demo/gradio_bg"
11
  os.makedirs(new_tmp_dir, exist_ok=True)
misc_utils/__pycache__/train_utils.cpython-310.pyc CHANGED
Binary files a/misc_utils/__pycache__/train_utils.cpython-310.pyc and b/misc_utils/__pycache__/train_utils.cpython-310.pyc differ
 
modules/openclip/__pycache__/modules.cpython-310.pyc CHANGED
Binary files a/modules/openclip/__pycache__/modules.cpython-310.pyc and b/modules/openclip/__pycache__/modules.cpython-310.pyc differ
 
pl_trainer/__pycache__/instruct_p2p_video.cpython-310.pyc CHANGED
Binary files a/pl_trainer/__pycache__/instruct_p2p_video.cpython-310.pyc and b/pl_trainer/__pycache__/instruct_p2p_video.cpython-310.pyc differ
 
pl_trainer/inference/__pycache__/inference.cpython-310.pyc CHANGED
Binary files a/pl_trainer/inference/__pycache__/inference.cpython-310.pyc and b/pl_trainer/inference/__pycache__/inference.cpython-310.pyc differ
 
static_fg_sync_bg_visualization_fy/14_22_100fps.png ADDED
static_fg_sync_bg_visualization_fy/14_55_100fps.png ADDED
static_fg_sync_bg_visualization_fy/15_27_100fps.png ADDED
static_fg_sync_bg_visualization_fy/18_23_100fps.png ADDED
static_fg_sync_bg_visualization_fy/18_33_100fps.png ADDED
static_fg_sync_bg_visualization_fy/22_39_100fps.png ADDED
static_fg_sync_bg_visualization_fy/22_59_100fps.png ADDED
static_fg_sync_bg_visualization_fy/9_10_100fps.png ADDED
static_fg_sync_bg_visualization_fy/9_14_100fps.png ADDED
static_fg_sync_bg_visualization_fy/9_8_100fps.png ADDED
static_fg_sync_bg_visualization_fy/9_9_100fps.png ADDED
tmp.py ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ from enum import Enum
5
+ import db_examples
6
+ import cv2
7
+
8
+
9
+ from demo_utils1 import *
10
+
11
+ from misc_utils.train_utils import unit_test_create_model
12
+ from misc_utils.image_utils import save_tensor_to_gif, save_tensor_to_images
13
+ import os
14
+ from PIL import Image
15
+ import torch
16
+ import torchvision
17
+ from torchvision import transforms
18
+ from einops import rearrange
19
+ import imageio
20
+ import time
21
+
22
+ from torchvision.transforms import functional as F
23
+ from torch.hub import download_url_to_file
24
+
25
+ import os
26
+
27
+ # 推理设置
28
+ from pl_trainer.inference.inference import InferenceIP2PVideo
29
+ from tqdm import tqdm
30
+
31
+
32
+ # if not os.path.exists(filename):
33
+ # original_path = os.getcwd()
34
+ # base_path = './models'
35
+ # os.makedirs(base_path, exist_ok=True)
36
+
37
+ # # 直接在代码中写入 Token(注意安全风险)
38
+ # GIT_TOKEN = "955b8ea91095840b76fe38b90a088c200d4c813c"
39
+ # repo_url = f"https://YeFang:{GIT_TOKEN}@code.openxlab.org.cn/YeFang/RIV_models.git"
40
+
41
+ # try:
42
+ # if os.system(f'git clone {repo_url} {base_path}') != 0:
43
+ # raise RuntimeError("Git 克隆失败")
44
+ # os.chdir(base_path)
45
+ # if os.system('git lfs pull') != 0:
46
+ # raise RuntimeError("Git LFS 拉取失败")
47
+ # finally:
48
+ # os.chdir(original_path)
49
+
50
+ def tensor_to_pil_image(x):
51
+ """
52
+ 将 4D PyTorch 张量转换为 PIL 图像。
53
+ """
54
+ x = x.float() # 确保张量类型为 float
55
+ grid_img = torchvision.utils.make_grid(x, nrow=4).permute(1, 2, 0).detach().cpu().numpy()
56
+ grid_img = (grid_img * 255).clip(0, 255).astype("uint8") # 将 [0, 1] 范围转换为 [0, 255]
57
+ return Image.fromarray(grid_img)
58
+
59
+ def frame_to_batch(x):
60
+ """
61
+ 将帧维度转换为批次维度。
62
+ """
63
+ return rearrange(x, 'b f c h w -> (b f) c h w')
64
+
65
+ def clip_image(x, min=0., max=1.):
66
+ """
67
+ 将图像张量裁剪到指定的最小和最大值。
68
+ """
69
+ return torch.clamp(x, min=min, max=max)
70
+
71
+ def unnormalize(x):
72
+ """
73
+ 将张量范围从 [-1, 1] 转换到 [0, 1]。
74
+ """
75
+ return (x + 1) / 2
76
+
77
+
78
+ # 读取图像文件
79
+ def read_images_from_directory(directory, num_frames=16):
80
+ images = []
81
+ for i in range(num_frames):
82
+ img_path = os.path.join(directory, f'{i:04d}.png')
83
+ img = imageio.imread(img_path)
84
+ images.append(torch.tensor(img).permute(2, 0, 1)) # Convert to Tensor (C, H, W)
85
+ return images
86
+
87
+ def load_and_process_images(folder_path):
88
+ """
89
+ 读取文件夹中的所有图片,将它们转换为 [-1, 1] 范围的张量并返回一个 4D 张量。
90
+ """
91
+ processed_images = []
92
+ transform = transforms.Compose([
93
+ transforms.ToTensor(),
94
+ transforms.Lambda(lambda x: x * 2 - 1) # 将 [0, 1] 转换为 [-1, 1]
95
+ ])
96
+ for filename in sorted(os.listdir(folder_path)):
97
+ if filename.endswith(".png"):
98
+ img_path = os.path.join(folder_path, filename)
99
+ image = Image.open(img_path).convert("RGB")
100
+ processed_image = transform(image)
101
+ processed_images.append(processed_image)
102
+ return torch.stack(processed_images) # 返回 4D 张量
103
+
104
+ def load_and_process_video(video_path, num_frames=16, crop_size=512):
105
+ """
106
+ 读取视频文件中的前 num_frames 帧,将每一帧转换为 [-1, 1] 范围的张量,
107
+ 并进行中心裁剪至 crop_size x crop_size,返回一个 4D 张量。
108
+ """
109
+ processed_frames = []
110
+ transform = transforms.Compose([
111
+ transforms.CenterCrop(crop_size), # 中心裁剪
112
+ transforms.ToTensor(),
113
+ transforms.Lambda(lambda x: x * 2 - 1) # 将 [0, 1] 转换为 [-1, 1]
114
+ ])
115
+
116
+ # 使用 OpenCV 读取视频
117
+ cap = cv2.VideoCapture(video_path)
118
+
119
+ if not cap.isOpened():
120
+ raise ValueError(f"无法打开视频文件: {video_path}")
121
+
122
+ frame_count = 0
123
+
124
+ while frame_count < num_frames:
125
+ ret, frame = cap.read()
126
+ if not ret:
127
+ break # 视频帧读取完毕或视频帧不足
128
+
129
+ # 转换为 RGB 格式
130
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
131
+ image = Image.fromarray(frame)
132
+
133
+ # 应用转换
134
+ processed_frame = transform(image)
135
+ processed_frames.append(processed_frame)
136
+
137
+ frame_count += 1
138
+
139
+ cap.release() # 释放视频资源
140
+
141
+ if len(processed_frames) < num_frames:
142
+ raise ValueError(f"视频帧不足 {num_frames} 帧,仅找到 {len(processed_frames)} 帧。")
143
+
144
+ return torch.stack(processed_frames) # 返回 4D 张量 (帧数, 通道数, 高度, 宽度)
145
+
146
+
147
+ def clear_cache(output_path):
148
+ if os.path.exists(output_path):
149
+ os.remove(output_path)
150
+ return None
151
+
152
+
153
+ #! 加载模型
154
+ # 配置路径和加载模型
155
+ config_path = 'configs/instruct_v2v_ic_gradio.yaml'
156
+ diffusion_model = unit_test_create_model(config_path)
157
+ diffusion_model = diffusion_model.to('cuda')
158
+
159
+ # 加载模型检查点
160
+ # ckpt_path = 'models/relvid_mm_sd15_fbc_unet.pth' #! change
161
+ # ckpt_path = 'tmp/pytorch_model.bin'
162
+ # 下载文件
163
+
164
+ os.makedirs('models', exist_ok=True)
165
+ model_path = "models/relvid_mm_sd15_fbc_unet.pth"
166
+
167
+ if not os.path.exists(model_path):
168
+ download_url_to_file(url='https://huggingface.co/aleafy/RelightVid/resolve/main/relvid_mm_sd15_fbc_unet.pth', dst=model_path)
169
+
170
+
171
+ ckpt = torch.load(model_path, map_location='cpu')
172
+ diffusion_model.load_state_dict(ckpt, strict=False)
173
+
174
+
175
+ # import pdb; pdb.set_trace()
176
+
177
+ # 更改全局临时目录
178
+ new_tmp_dir = "./demo/gradio_bg"
179
+ os.makedirs(new_tmp_dir, exist_ok=True)
180
+
181
+ # import pdb; pdb.set_trace()
182
+
183
+ def save_video_from_frames(image_pred, save_pth, fps=8):
184
+ """
185
+ 将 image_pred 中的帧保存为视频文件。
186
+
187
+ 参数:
188
+ - image_pred: Tensor,形状为 (1, 16, 3, 512, 512)
189
+ - save_pth: 保存视频的路径,例如 "output_video.mp4"
190
+ - fps: 视频的帧率
191
+ """
192
+ # 视频参数
193
+ num_frames = image_pred.shape[1]
194
+ frame_height, frame_width = 512, 512 # 目标尺寸
195
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 使用 mp4 编码格式
196
+
197
+ # 创建 VideoWriter 对象
198
+ out = cv2.VideoWriter(save_pth, fourcc, fps, (frame_width, frame_height))
199
+
200
+ for i in range(num_frames):
201
+ # 反归一化 + 转换为 0-255 范围
202
+ pred_frame = clip_image(unnormalize(image_pred[0][i].unsqueeze(0))) * 255
203
+ pred_frame_resized = pred_frame.squeeze(0).detach().cpu() # (3, 512, 512)
204
+ pred_frame_resized = pred_frame_resized.permute(1, 2, 0).numpy().astype("uint8") # (512, 512, 3)
205
+
206
+ # Resize 到 256x256
207
+ pred_frame_resized = cv2.resize(pred_frame_resized, (frame_width, frame_height))
208
+
209
+ # 将 RGB 转为 BGR(因为 OpenCV 使用 BGR 格式)
210
+ pred_frame_bgr = cv2.cvtColor(pred_frame_resized, cv2.COLOR_RGB2BGR)
211
+
212
+ # 写入帧到视频
213
+ out.write(pred_frame_bgr)
214
+
215
+ # 释放 VideoWriter 资源
216
+ out.release()
217
+ print(f"视频已保存至 {save_pth}")
218
+
219
+
220
+ inf_pipe = InferenceIP2PVideo(
221
+ diffusion_model.unet,
222
+ scheduler='ddpm',
223
+ num_ddim_steps=20
224
+ )
225
+
226
+
227
+ def process_example(*args):
228
+ v_index = args[0]
229
+ select_e = db_examples.background_conditioned_examples[int(v_index)-1]
230
+ input_fg_path = select_e[1]
231
+ input_bg_path = select_e[2]
232
+ result_video_path = select_e[-1]
233
+ # input_fg_img = args[1] # 第 0 个参数
234
+ # input_bg_img = args[2] # 第 1 个参数
235
+ # result_video_img = args[-1] # 最后一个参数
236
+
237
+ input_fg = input_fg_path.replace("frames/0000.png", "cropped_video.mp4")
238
+ input_bg = input_bg_path.replace("frames/0000.png", "cropped_video.mp4")
239
+ result_video = result_video_path.replace(".png", ".mp4")
240
+
241
+ return input_fg, input_bg, result_video
242
+
243
+
244
+
245
+ # 伪函数占位(生成空白视频)
246
+ def dummy_process(input_fg, input_bg, prompt):
247
+ # import pdb; pdb.set_trace()
248
+
249
+ diffusion_model.to(torch.float16)
250
+ fg_tensor = load_and_process_video(input_fg).cuda().unsqueeze(0).to(dtype=torch.float16)
251
+ bg_tensor = load_and_process_video(input_bg).cuda().unsqueeze(0).to(dtype=torch.float16) # (1, 16, 4, 64, 64)
252
+
253
+ cond_fg_tensor = diffusion_model.encode_image_to_latent(fg_tensor) # (1, 16, 4, 64, 64)
254
+ cond_bg_tensor = diffusion_model.encode_image_to_latent(bg_tensor)
255
+ cond_tensor = torch.cat((cond_fg_tensor, cond_bg_tensor), dim=2)
256
+
257
+ # 初始化潜变量
258
+ init_latent = torch.randn_like(cond_fg_tensor)
259
+
260
+ # EDIT_PROMPT = 'change the background'
261
+ EDIT_PROMPT = prompt
262
+ VIDEO_CFG = 1.2
263
+ TEXT_CFG = 7.5
264
+ text_cond = diffusion_model.encode_text([EDIT_PROMPT]) # (1, 77, 768)
265
+ text_uncond = diffusion_model.encode_text([''])
266
+ # to float16
267
+ print('------------to float 16----------------')
268
+ init_latent, text_cond, text_uncond, cond_tensor = (
269
+ init_latent.to(dtype=torch.float16),
270
+ text_cond.to(dtype=torch.float16),
271
+ text_uncond.to(dtype=torch.float16),
272
+ cond_tensor.to(dtype=torch.float16)
273
+ )
274
+ inf_pipe.unet.to(torch.float16)
275
+ latent_pred = inf_pipe(
276
+ latent=init_latent,
277
+ text_cond=text_cond,
278
+ text_uncond=text_uncond,
279
+ img_cond=cond_tensor,
280
+ text_cfg=TEXT_CFG,
281
+ img_cfg=VIDEO_CFG,
282
+ )['latent']
283
+
284
+
285
+ image_pred = diffusion_model.decode_latent_to_image(latent_pred) # (1,16,3,512,512)
286
+ output_path = os.path.join(new_tmp_dir, f"output_{int(time.time())}.mp4")
287
+ # clear_cache(output_path)
288
+
289
+ save_video_from_frames(image_pred, output_path)
290
+ # import pdb; pdb.set_trace()
291
+ # fps = 8
292
+ # frames = []
293
+ # for i in range(16):
294
+ # pred_frame = clip_image(unnormalize(image_pred[0][i].unsqueeze(0))) * 255
295
+ # pred_frame_resized = pred_frame.squeeze(0).detach().cpu() #(3,512,512)
296
+ # pred_frame_resized = pred_frame_resized.permute(1, 2, 0).detach().cpu().numpy().astype("uint8") #(512,512,3) np
297
+ # Image.fromarray(pred_frame_resized).save(save_pth)
298
+
299
+ # # 生成一个简单的黑色视频作为示例
300
+ # output_path = os.path.join(new_tmp_dir, "output.mp4")
301
+ # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
302
+ # out = cv2.VideoWriter(output_path, fourcc, 20.0, (512, 512))
303
+
304
+ # for _ in range(60): # 生成 3 秒的视频(20fps)
305
+ # frame = np.zeros((512, 512, 3), dtype=np.uint8)
306
+ # out.write(frame)
307
+ # out.release()
308
+ torch.cuda.empty_cache()
309
+
310
+ return output_path
311
+
312
+ # 枚举类用于背景选择
313
+ class BGSource(Enum):
314
+ UPLOAD = "Use Background Video"
315
+ UPLOAD_FLIP = "Use Flipped Background Video"
316
+ UPLOAD_REVERSE = "Use Reversed Background Video"
317
+
318
+
319
+ # Quick prompts 示例
320
+ # quick_prompts = [
321
+ # 'beautiful woman, fantasy setting',
322
+ # 'beautiful woman, neon dynamic lighting',
323
+ # 'man in suit, tunel lighting',
324
+ # 'animated mouse, aesthetic lighting',
325
+ # 'robot warrior, a sunset background',
326
+ # 'yellow cat, reflective wet beach',
327
+ # 'camera, dock, calm sunset',
328
+ # 'astronaut, dim lighting',
329
+ # 'astronaut, colorful balloons',
330
+ # 'astronaut, desert landscape'
331
+ # ]
332
+
333
+ # quick_prompts = [
334
+ # 'beautiful woman',
335
+ # 'handsome man',
336
+ # 'beautiful woman, cinematic lighting',
337
+ # 'handsome man, cinematic lighting',
338
+ # 'beautiful woman, natural lighting',
339
+ # 'handsome man, natural lighting',
340
+ # 'beautiful woman, neo punk lighting, cyberpunk',
341
+ # 'handsome man, neo punk lighting, cyberpunk',
342
+ # ]
343
+
344
+
345
+ quick_prompts = [
346
+ 'beautiful woman',
347
+ 'handsome man',
348
+ 'beautiful woman, cinematic lighting',
349
+ 'handsome man, cinematic lighting',
350
+ 'beautiful woman, natural lighting',
351
+ 'handsome man, natural lighting',
352
+ 'beautiful woman, warm lighting',
353
+ 'handsome man, soft lighting',
354
+ 'change the background lighting',
355
+ ]
356
+
357
+
358
+ quick_prompts = [[x] for x in quick_prompts]
359
+
360
+ # css = """
361
+ # #foreground-gallery {
362
+ # width: 700 !important; /* 限制最大宽度 */
363
+ # max-width: 700px !important; /* 避免它自动变宽 */
364
+ # flex: none !important; /* 让它不自动扩展 */
365
+ # }
366
+ # """
367
+
368
+ # Gradio UI 结构
369
+ block = gr.Blocks().queue()
370
+ with block:
371
+ with gr.Row():
372
+ # gr.Markdown("## RelightVid (Relighting with Foreground and Background Video Condition)")
373
+ gr.Markdown("# 💡RelightVid \n### Relighting with Foreground and Background Video Condition")
374
+
375
+ with gr.Row():
376
+ with gr.Column():
377
+ with gr.Row():
378
+ input_fg = gr.Video(label="Foreground Video", height=380, width=420, visible=True)
379
+ input_bg = gr.Video(label="Background Video", height=380, width=420, visible=True)
380
+
381
+ segment_button = gr.Button(value="Video Segmentation")
382
+ with gr.Accordion("Segmentation Options", open=False):
383
+ # 如果用户不使用 point_prompt,而是直接提供坐标,则使用 x, y
384
+ with gr.Row():
385
+ x_coord = gr.Slider(label="X Coordinate (Point Prompt Ratio)", minimum=0.0, maximum=1.0, value=0.5, step=0.01)
386
+ y_coord = gr.Slider(label="Y Coordinate (Point Prompt Ratio)", minimum=0.0, maximum=1.0, value=0.5, step=0.01)
387
+
388
+
389
+ fg_gallery = gr.Gallery(height=150, object_fit='contain', label='Foreground Quick List', value=db_examples.fg_samples, columns=5, allow_preview=False)
390
+ bg_gallery = gr.Gallery(height=450, object_fit='contain', label='Background Quick List', value=db_examples.bg_samples, columns=5, allow_preview=False)
391
+
392
+
393
+ with gr.Group():
394
+ # with gr.Row():
395
+ # num_samples = gr.Slider(label="Videos", minimum=1, maximum=12, value=1, step=1)
396
+ # seed = gr.Number(label="Seed", value=12345, precision=0)
397
+ with gr.Row():
398
+ video_width = gr.Slider(label="Video Width", minimum=256, maximum=1024, value=512, step=64, visible=False)
399
+ video_height = gr.Slider(label="Video Height", minimum=256, maximum=1024, value=512, step=64, visible=False)
400
+
401
+ # with gr.Accordion("Advanced options", open=False):
402
+ # steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
403
+ # cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=7.0, step=0.01)
404
+ # highres_scale = gr.Slider(label="Highres Scale", minimum=1.0, maximum=3.0, value=1.5, step=0.01)
405
+ # highres_denoise = gr.Slider(label="Highres Denoise", minimum=0.1, maximum=0.9, value=0.5, step=0.01)
406
+ # a_prompt = gr.Textbox(label="Added Prompt", value='best quality')
407
+ # n_prompt = gr.Textbox(label="Negative Prompt", value='lowres, bad anatomy, bad hands, cropped, worst quality')
408
+ # normal_button = gr.Button(value="Compute Normal (4x Slower)")
409
+
410
+ with gr.Column():
411
+ result_video = gr.Video(label='Output Video', height=700, width=700, visible=True)
412
+
413
+ prompt = gr.Textbox(label="Prompt")
414
+ bg_source = gr.Radio(choices=[e.value for e in BGSource],
415
+ value=BGSource.UPLOAD.value,
416
+ label="Background Source", type='value')
417
+
418
+ example_prompts = gr.Dataset(samples=quick_prompts, label='Prompt Quick List', components=[prompt])
419
+ relight_button = gr.Button(value="Relight")
420
+ # fg_gallery = gr.Gallery(witdth=400, object_fit='contain', label='Foreground Quick List', value=db_examples.bg_samples, columns=4, allow_preview=False)
421
+ # fg_gallery = gr.Gallery(
422
+ # height=380,
423
+ # object_fit='contain',
424
+ # label='Foreground Quick List',
425
+ # value=db_examples.fg_samples,
426
+ # columns=4,
427
+ # allow_preview=False,
428
+ # elem_id="foreground-gallery" # 👈 添加 elem_id
429
+ # )
430
+
431
+
432
+ # 输入列表
433
+ # ips = [input_fg, input_bg, prompt, video_width, video_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, bg_source]
434
+ ips = [input_fg, input_bg, prompt]
435
+
436
+ # 按钮绑定处理函数
437
+ # relight_button.click(fn=lambda: None, inputs=[], outputs=[result_video])
438
+
439
+ relight_button.click(fn=dummy_process, inputs=ips, outputs=[result_video])
440
+
441
+ # normal_button.click(fn=dummy_process, inputs=ips, outputs=[result_video])
442
+
443
+ # 背景库选择
444
+ def bg_gallery_selected(gal, evt: gr.SelectData):
445
+ # import pdb; pdb.set_trace()
446
+ # img_path = gal[evt.index][0]
447
+ img_path = db_examples.bg_samples[evt.index]
448
+ video_path = img_path.replace('frames/0000.png', 'cropped_video.mp4')
449
+ return video_path
450
+
451
+ bg_gallery.select(bg_gallery_selected, inputs=bg_gallery, outputs=input_bg)
452
+
453
+ def fg_gallery_selected(gal, evt: gr.SelectData):
454
+ # import pdb; pdb.set_trace()
455
+ # img_path = gal[evt.index][0]
456
+ img_path = db_examples.fg_samples[evt.index]
457
+ video_path = img_path.replace('frames/0000.png', 'cropped_video.mp4')
458
+ return video_path
459
+
460
+ fg_gallery.select(fg_gallery_selected, inputs=fg_gallery, outputs=input_fg)
461
+
462
+ input_fg_img = gr.Image(label="Foreground Video", visible=False)
463
+ input_bg_img = gr.Image(label="Background Video", visible=False)
464
+ result_video_img = gr.Image(label="Output Video", visible=False)
465
+
466
+ v_index = gr.Textbox(label="ID", visible=False)
467
+ example_prompts.click(lambda x: x[0], inputs=example_prompts, outputs=prompt, show_progress=False, queue=False)
468
+
469
+ # 示例
470
+ # dummy_video_for_outputs = gr.Video(visible=False, label='Result')
471
+ gr.Examples(
472
+ # fn=lambda *args: args[-1],
473
+ fn=process_example,
474
+ examples=db_examples.background_conditioned_examples,
475
+ # inputs=[v_index, input_fg_img, input_bg_img, prompt, bg_source, video_width, video_height, result_video_img],
476
+ inputs=[v_index, input_fg_img, input_bg_img, prompt, bg_source, result_video_img],
477
+ outputs=[input_fg, input_bg, result_video],
478
+ run_on_click=True, examples_per_page=1024
479
+ )
480
+
481
+ # 启动 Gradio 应用
482
+ # block.launch(server_name='0.0.0.0', server_port=10002, share=True)
483
+ block.launch(share=True)