diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..19f846cfea6e8d0869d82423a206db6e52aaa6ff 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..9d3c3334f242922702f331f24912c9b1c485c803
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,14 @@
+batch*
+phoe*
+*gt*
+*__pycache__*
+*ocr*
+*.pyc
+*.pyo
+*.pyd
+*.txt
+*.json
+.gradio
+output
+gradio_cache
+__pycache__
\ No newline at end of file
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 0000000000000000000000000000000000000000..4001501a41fb43ff8656366b650fe671db566a31
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,29 @@
+# Installation
+
+Downloading VisualCloze repo from github:
+
+```bash
+git clone xxx
+```
+
+### 1. Create a conda environment and install PyTorch
+
+Note: You may want to adjust the CUDA version [according to your driver version](https://docs.nvidia.com/deploy/cuda-compatibility/#default-to-minor-version).
+
+```bash
+conda create -n visualcloze -y
+conda activate visualcloze
+conda install python=3.11 pytorch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 pytorch-cuda=12.1 -c pytorch -c nvidia -y
+```
+
+### 2. Install dependencies
+
+```bash
+pip install -r requirements.txt
+```
+
+### 3. Install flash-attn
+
+```bash
+pip install flash-attn --no-build-isolation
+```
\ No newline at end of file
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bde553d2a40175727e0489b7571b39725a31a80
--- /dev/null
+++ b/app.py
@@ -0,0 +1,491 @@
+import argparse
+import gradio as gr
+import demo_tasks
+from functools import partial
+from data.prefix_instruction import get_layout_instruction
+from visualcloze import VisualClozeModel
+
+
+max_grid_h = 5
+max_grid_w = 5
+default_grid_h = 2
+default_grid_w = 3
+default_upsampling_noise = 0.4
+default_steps = 30
+
+GUIDANCE = """
+
+## 📋 Quick Start Guide:
+1. Adjust **Number of In-context Examples**, 0 disables in-context learning.
+2. Set **Task Columns**, the number of images involved in a task.
+3. Upload Images. For in-context examples, upload all images. For the current query, upload images exclude the target.
+4. Click **Generate** to create the images.
+5. Parameters can be fine-tuned under **Advanced Options**.
+
+
Click the task button in the right bottom to acquire examples of various tasks.
+
+"""
+
+CITATION = r"""
+If you find VisualCloze is helpful, please consider to star ⭐ the Github Repo . Thanks!
+---
+📝 **Citation**
+
+If our work is useful for your research, please consider citing:
+```bibtex
+@article{li2025visualcloze,
+ title={VisualCloze : A Universal Image Generation Framework via Visual In-Context Learning},
+ author={Li, Zhong-Yu and Du, ruoyi and Yan, Juncheng and Zhuo, Le and Li, Zhen and Gao, Peng and Ma, Zhanyu and Cheng, Ming-Ming},
+ booktitle={arXiv preprint arxiv:},
+ year={2025}
+}
+```
+📋 **License**
+
+This project is licensed under xxx.
+"""
+
+INTRODUCTION = """
+## 📋 Introduction:
+VisualCloze utilizes in-context examples as visual demonstrations to clarify the desired task.
+
+Through in-context learning, VisualCloze can:
+1. support various in-domain tasks,
+2. generalize to **unseen tasks** through in-context learning,
+3. unify multiple tasks into one step and generate not only the target image but also the intermediate results,
+4. support reverse generation, i.e., reverse-engineering a set of conditions from a target.
+
+"""
+
+def create_demo(model):
+ with gr.Blocks(title="VisualCloze Demo") as demo:
+ gr.Markdown("# VisualCloze: A Universal Image Generation Framework via Visual In-Context Learning")
+
+ gr.HTML("""
+
+ """)
+
+ with gr.Row():
+ with gr.Column(scale=2):
+ gr.Markdown(INTRODUCTION)
+ with gr.Column(scale=2):
+ gr.Markdown(GUIDANCE)
+
+ # gr.Markdown("" +
+ # "Note: Click the task button in the right bottom to acquire examples of tasks." +
+ # "
", )
+
+ # Pre-create all possible image components
+ all_image_inputs = []
+ rows = []
+ row_texts = []
+ with gr.Row():
+
+ # 左侧列:图像网格和提示输入
+ with gr.Column(scale=2):
+ # 图像网格部分
+ for i in range(max_grid_h):
+ # Add row label before each row
+ row_texts.append(gr.Markdown(
+ "" +
+ ("query" if i == default_grid_h - 1 else f"In-context Example {i + 1}") +
+ "
",
+ elem_id=f"row_text_{i}",
+ visible=i < default_grid_h
+ ))
+ with gr.Row(visible=i < default_grid_h, elem_id=f"row_{i}") as row:
+ rows.append(row)
+ for j in range(max_grid_w):
+ img_input = gr.Image(
+ label=f"In-context Example {i + 1}/{j + 1}" if i != default_grid_h - 1 else f"Query {j + 1}",
+ type="pil",
+ visible= i < default_grid_h and j < default_grid_w,
+ interactive=True,
+ elem_id=f"img_{i}_{j}"
+ )
+ all_image_inputs.append(img_input)
+
+ # 提示输入部分
+ layout_prompt = gr.Textbox(
+ label="Layout Description (Auto-filled, Read-only)",
+ placeholder="Layout description will be automatically filled based on grid size...",
+ value=get_layout_instruction(default_grid_w, default_grid_h),
+ elem_id="layout_prompt",
+ interactive=False
+ )
+
+ task_prompt = gr.Textbox(
+ label="Task Description (Can be modified by referring to examples to perform custom tasks, but may lead to unstable results)",
+ placeholder="Describe what task should be performed...",
+ value="",
+ elem_id="task_prompt"
+ )
+
+ content_prompt = gr.Textbox(
+ label="Content Description (Image caption, Editing instructions, etc.)",
+ placeholder="Describe the content requirements...",
+ value="",
+ elem_id="content_prompt"
+ )
+
+ generate_btn = gr.Button("Generate", elem_id="generate_btn")
+
+ grid_h = gr.Slider(minimum=0, maximum=max_grid_h-1, value=default_grid_h-1, step=1, label="Number of In-context Examples", elem_id="grid_h")
+ grid_w = gr.Slider(minimum=1, maximum=max_grid_w, value=default_grid_w, step=1, label="Task Columns", elem_id="grid_w")
+
+ with gr.Accordion("Advanced options", open=False):
+ seed = gr.Number(label="Seed (0 for random)", value=0, precision=0)
+ steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=default_steps, step=1)
+ cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=50.0, value=30, step=1)
+ upsampling_steps = gr.Slider(label="Upsampling steps (SDEdit)", minimum=1, maximum=100.0, value=10, step=1)
+ upsampling_noise = gr.Slider(label="Upsampling noise (SDEdit)", minimum=0, maximum=1.0, value=default_upsampling_noise, step=0.01)
+
+ gr.Markdown(CITATION)
+
+ # 右侧列:输出图像
+ with gr.Column(scale=2):
+ output_gallery = gr.Gallery(
+ label="Generated Results",
+ show_label=True,
+ elem_id="output_gallery",
+ columns=None, # 设为None以允许自动调整
+ rows=None, # 设为None以允许自动调整
+ height="auto",
+ allow_preview=True,
+ object_fit="contain" # 确保图片完整显示
+ )
+
+ gr.Markdown("# Task Examples")
+ text_dense_prediction_tasks = gr.Textbox(label="Task", visible=False)
+ dense_prediction_tasks = gr.Dataset(
+ samples=demo_tasks.dense_prediction_text,
+ label='Dense Prediction',
+ samples_per_page=1000,
+ components=[text_dense_prediction_tasks])
+
+ text_conditional_generation_tasks = gr.Textbox(label="Task", visible=False)
+ conditional_generation_tasks = gr.Dataset(
+ samples=demo_tasks.conditional_generation_text,
+ label='Conditional Generation',
+ samples_per_page=1000,
+ components=[text_conditional_generation_tasks])
+
+ text_image_restoration_tasks = gr.Textbox(label="Task", visible=False)
+ image_restoration_tasks = gr.Dataset(
+ samples=demo_tasks.image_restoration_text,
+ label='Image Restoration',
+ samples_per_page=1000,
+ components=[text_image_restoration_tasks])
+
+ text_style_transfer_tasks = gr.Textbox(label="Task", visible=False)
+ style_transfer_tasks = gr.Dataset(
+ samples=demo_tasks.style_transfer_text,
+ label='Style Transfer',
+ samples_per_page=1000,
+ components=[text_style_transfer_tasks])
+
+ text_style_condition_fusion_tasks = gr.Textbox(label="Task", visible=False)
+ style_condition_fusion_tasks = gr.Dataset(
+ samples=demo_tasks.style_condition_fusion_text,
+ label='Style Condition Fusion',
+ samples_per_page=1000,
+ components=[text_style_condition_fusion_tasks])
+
+ text_tryon_tasks = gr.Textbox(label="Task", visible=False)
+ tryon_tasks = gr.Dataset(
+ samples=demo_tasks.tryon_text,
+ label='Virtual Try-On',
+ samples_per_page=1000,
+ components=[text_tryon_tasks])
+
+ text_relighting_tasks = gr.Textbox(label="Task", visible=False)
+ relighting_tasks = gr.Dataset(
+ samples=demo_tasks.relighting_text,
+ label='Relighting',
+ samples_per_page=1000,
+ components=[text_relighting_tasks])
+
+ text_photodoodle_tasks = gr.Textbox(label="Task", visible=False)
+ photodoodle_tasks = gr.Dataset(
+ samples=demo_tasks.photodoodle_text,
+ label='Photodoodle',
+ samples_per_page=1000,
+ components=[text_photodoodle_tasks])
+
+ text_editing_tasks = gr.Textbox(label="Task", visible=False)
+ editing_tasks = gr.Dataset(
+ samples=demo_tasks.editing_text,
+ label='Editing',
+ samples_per_page=1000,
+ components=[text_editing_tasks])
+
+ text_unseen_tasks = gr.Textbox(label="Task", visible=False)
+ unseen_tasks = gr.Dataset(
+ samples=demo_tasks.unseen_tasks_text,
+ label='Unseen Tasks (May produce unstable effects)',
+ samples_per_page=1000,
+ components=[text_unseen_tasks])
+
+ gr.Markdown("# Subject-driven Tasks Examples")
+ text_subject_driven_tasks = gr.Textbox(label="Task", visible=False)
+ subject_driven_tasks = gr.Dataset(
+ samples=demo_tasks.subject_driven_text,
+ label='Subject-driven Generation',
+ samples_per_page=1000,
+ components=[text_subject_driven_tasks])
+
+ text_condition_subject_fusion_tasks = gr.Textbox(label="Task", visible=False)
+ condition_subject_fusion_tasks = gr.Dataset(
+ samples=demo_tasks.condition_subject_fusion_text,
+ label='Condition+Subject Fusion',
+ samples_per_page=1000,
+ components=[text_condition_subject_fusion_tasks])
+
+ text_style_transfer_with_subject_tasks = gr.Textbox(label="Task", visible=False)
+ style_transfer_with_subject_tasks = gr.Dataset(
+ samples=demo_tasks.style_transfer_with_subject_text,
+ label='Style Transfer with Subject',
+ samples_per_page=1000,
+ components=[text_style_transfer_with_subject_tasks])
+
+ text_condition_subject_style_fusion_tasks = gr.Textbox(label="Task", visible=False)
+ condition_subject_style_fusion_tasks = gr.Dataset(
+ samples=demo_tasks.condition_subject_style_fusion_text,
+ label='Condition+Subject+Style Fusion',
+ samples_per_page=1000,
+ components=[text_condition_subject_style_fusion_tasks])
+
+ text_editing_with_subject_tasks = gr.Textbox(label="Task", visible=False)
+ editing_with_subject_tasks = gr.Dataset(
+ samples=demo_tasks.editing_with_subject_text,
+ label='Editing with Subject',
+ samples_per_page=1000,
+ components=[text_editing_with_subject_tasks])
+
+ text_image_restoration_with_subject_tasks = gr.Textbox(label="Task", visible=False)
+ image_restoration_with_subject_tasks = gr.Dataset(
+ samples=demo_tasks.image_restoration_with_subject_text,
+ label='Image Restoration with Subject',
+ samples_per_page=1000,
+ components=[text_image_restoration_with_subject_tasks])
+
+ def update_grid(h, w):
+ actual_h = h + 1
+ model.set_grid_size(actual_h, w)
+
+ updates = []
+
+ # Update image component visibility
+ for i in range(max_grid_h * max_grid_w):
+ curr_row = i // max_grid_w
+ curr_col = i % max_grid_w
+ updates.append(
+ gr.update(
+ label=f"In-context Example {curr_row + 1}/{curr_col + 1}" if curr_row != actual_h - 1 else f"Query {curr_col + 1}",
+ elem_id=f"img_{curr_row}_{curr_col}",
+ visible=(curr_row < actual_h and curr_col < w)))
+
+ # Update row visibility and labels
+ updates_row = []
+ updates_row_text = []
+ for i in range(max_grid_h):
+ updates_row.append(gr.update(f"row_{i}", visible=(i < actual_h)))
+ updates_row_text.append(
+ gr.update(
+ elem_id=f"row_text_{i}",
+ visible=i < actual_h,
+ value="" +
+ ("Query" if i == actual_h - 1 else f"In-context Example {i + 1}") +
+ "
",
+ )
+ )
+
+ updates.extend(updates_row)
+ updates.extend(updates_row_text)
+ updates.append(gr.update(elem_id="layout_prompt", value=get_layout_instruction(w, actual_h)))
+ return updates
+
+ def generate_image(*inputs):
+ images = []
+ for i in range(model.grid_h):
+ images.append([])
+ for j in range(model.grid_w):
+ images[i].append(inputs[i * max_grid_w + j])
+ seed, cfg, steps, upsampling_steps, upsampling_noise, layout_text, task_text, content_text = inputs[-8:]
+
+ results = model.process_images(
+ images,
+ [layout_text, task_text, content_text],
+ seed=seed, cfg=cfg, steps=steps,
+ upsampling_steps=upsampling_steps, upsampling_noise=upsampling_noise
+ )
+
+ output = gr.update(
+ elem_id='output_gallery',
+ value=results,
+ columns=min(len(results), 2),
+ rows=int(len(results) / 2 + 0.5))
+
+ return output
+
+ def process_tasks(task, func):
+ outputs = func(task)
+ mask = outputs[0]
+ state = outputs[1:8]
+ if state[5] is None:
+ state[5] = default_upsampling_noise
+ if state[6] is None:
+ state[6] = default_steps
+ images = outputs[8:-len(mask)]
+ output = outputs[-len(mask):]
+ for i in range(len(mask)):
+ if mask[i] == 1:
+ images.append(None)
+ else:
+ images.append(output[-len(mask) + i])
+
+ state[0] = state[0] - 1
+ cur_hrid_h = state[0]
+ cur_hrid_w = state[1]
+
+ current_example = [None] * 25
+ for i, image in enumerate(images):
+ pos = (i // cur_hrid_w) * 5 + (i % cur_hrid_w)
+ if image is not None:
+ current_example[pos] = image
+ update_grid(cur_hrid_h, cur_hrid_w)
+ output = gr.update(
+ elem_id='output_gallery',
+ value=output,
+ columns=min(len(output), 2),
+ rows=int(len(output) / 2 + 0.5))
+ return [output] + current_example + state
+
+ dense_prediction_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_dense_prediction_tasks),
+ inputs=[dense_prediction_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ conditional_generation_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_conditional_generation_tasks),
+ inputs=[conditional_generation_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ image_restoration_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_image_restoration_tasks),
+ inputs=[image_restoration_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ style_transfer_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_style_transfer_tasks),
+ inputs=[style_transfer_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ style_condition_fusion_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_style_condition_fusion_tasks),
+ inputs=[style_condition_fusion_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ relighting_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_relighting_tasks),
+ inputs=[relighting_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ tryon_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_tryon_tasks),
+ inputs=[tryon_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ photodoodle_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_photodoodle_tasks),
+ inputs=[photodoodle_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ editing_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_editing_tasks),
+ inputs=[editing_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ unseen_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_unseen_tasks),
+ inputs=[unseen_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ subject_driven_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_subject_driven_tasks),
+ inputs=[subject_driven_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ style_transfer_with_subject_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_style_transfer_with_subject_tasks),
+ inputs=[style_transfer_with_subject_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ condition_subject_fusion_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_condition_subject_fusion_tasks),
+ inputs=[condition_subject_fusion_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ condition_subject_style_fusion_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_condition_subject_style_fusion_tasks),
+ inputs=[condition_subject_style_fusion_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ editing_with_subject_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_editing_with_subject_tasks),
+ inputs=[editing_with_subject_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+
+ image_restoration_with_subject_tasks.click(
+ partial(process_tasks, func=demo_tasks.process_image_restoration_with_subject_tasks),
+ inputs=[image_restoration_with_subject_tasks],
+ outputs=[output_gallery] + all_image_inputs + [grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps], show_progress=False, queue=False)
+ # Initialize grid
+ model.set_grid_size(default_grid_h, default_grid_w)
+
+ # Connect event processing function to all components that need updating
+ output_components = all_image_inputs + rows + row_texts + [layout_prompt]
+
+ grid_h.change(fn=update_grid, inputs=[grid_h, grid_w], outputs=output_components)
+ grid_w.change(fn=update_grid, inputs=[grid_h, grid_w], outputs=output_components)
+
+ # Modify generate button click event
+ generate_btn.click(
+ fn=generate_image,
+ inputs=all_image_inputs + [seed, cfg, steps, upsampling_steps, upsampling_noise] + [layout_prompt, task_prompt, content_prompt],
+ outputs=output_gallery
+ )
+
+ return demo
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model_path", type=str, default=None)
+ parser.add_argument("--precision", type=str, choices=["fp32", "bf16", "fp16"], default="bf16")
+ parser.add_argument("--resolution", type=int, default=384)
+ return parser.parse_args()
+
+if __name__ == "__main__":
+ args = parse_args()
+
+ # Initialize model
+ model = VisualClozeModel(resolution=args.resolution, model_path=args.model_path, precision=args.precision)
+
+ # Create Gradio demo
+ demo = create_demo(model)
+
+ # Start Gradio server
+ demo.launch(share=False, server_port=10050, server_name="0.0.0.0")
\ No newline at end of file
diff --git a/data/__init__.py b/data/__init__.py
new file mode 100755
index 0000000000000000000000000000000000000000..3dedd706b28e02566f56165ff62cf2a5529c22fa
--- /dev/null
+++ b/data/__init__.py
@@ -0,0 +1 @@
+from .prefix_instruction import *
\ No newline at end of file
diff --git a/data/prefix_instruction.py b/data/prefix_instruction.py
new file mode 100644
index 0000000000000000000000000000000000000000..6230d23dd48d2df1c948ceb3ed8cd1065950fdfe
--- /dev/null
+++ b/data/prefix_instruction.py
@@ -0,0 +1,1086 @@
+import random
+
+condition_list = ["canny", "depth", "hed", "normal", "mlsd", "openpose", "sam2_mask", "mask", "foreground", "background", "uniformer"]
+style_list = ["InstantStyle", "ReduxStyle"]
+editing_list = ["DepthEdit", "FillEdit"]
+degradation_list = [
+ # blur
+ "blur",
+ "compression",
+ "SRx2",
+ "SRx4",
+ "pixelate",
+ "Defocus",
+ "GaussianBlur",
+ # sharpen
+ "oversharpen",
+ # nosie
+ "GaussianNoise",
+ "PoissonNoise",
+ "SPNoise",
+ # mosaic
+ "mosaic",
+ # contrast
+ "contrast_strengthen",
+ "contrast_weaken",
+ # quantization
+ "quantization",
+ "JPEG",
+ # light
+ "brighten",
+ "darken",
+ "LowLight",
+ # color
+ "saturate_strengthen",
+ "saturate_weaken",
+ "gray",
+ "ColorDistortion",
+ # infilling
+ "Inpainting",
+ # rotate
+ "rotate90",
+ "rotate180",
+ "rotate270",
+ # other
+ "Barrel",
+ "Pincushion",
+ "Elastic",
+ # spacial effect
+ "Rain",
+ "Frost",
+ ]
+
+
+def get_image_prompt(image_type):
+ image_prompts = {
+ "target": [
+ "a high-quality image",
+ "an aesthetically pleasing photograph",
+ "a high-resolution image",
+ "an image with vivid details",
+ "a visually striking and clear picture",
+ "a high-definition image",
+ "an image with artistic appeal",
+ "a sharp and beautifully composed photograph",
+ "a high-aesthetic image",
+ "an image with flawless clarity",
+ "a vibrant and professionally captured photo",
+ "a crystal-clear image",
+ "an image with artistic quality"
+ "a high-quality image with exceptional detail",
+ "a photo realistic image",
+ ],
+ "reference": [
+ "a reference image",
+ "an image featuring the primary object"
+ "a reference for the main object",
+ "a reference image highlighting the central object",
+ "an image containing the key object",
+ "a reference image with the main subject included",
+ "an image providing the main object",
+ "a reference image showcasing the dominant object",
+ "an image that includes the main object",
+ "a reference image capturing the primary subject",
+ "an image containing the main subject",
+ ],
+ # condition
+ "canny": [
+ "canny edge map with sharp black-and-white contours",
+ "black-and-white edge map highlighting crisp boundaries",
+ "canny result showing stark white edges on black",
+ "edge map with clean white lines on a dark background",
+ "canny output featuring precise white object outlines",
+ "black background with white edge-detected contours",
+ "canny edge map displaying clear white structural edges",
+ "white edge lines on black from canny detection",
+ "canny map with sharp white edges and dark voids",
+ "edge map revealing white outlines of object shapes",
+ ],
+ "depth": [
+ "depth map showing gray-scale object contours",
+ "gray-toned depth map with layered outlines",
+ "depth map featuring gradient-gray surfaces",
+ "gray-shaded depth map with distinct edges",
+ "depth map displaying soft gray gradients",
+ "gray-scale depth map with clear object boundaries",
+ "depth map highlighting gray-level depth variations",
+ "gray-textured depth map with smooth transitions",
+ "depth map revealing gray-toned spatial layers",
+ "gray-based depth map with detailed object contours",
+ ],
+ "hed": [
+ "hed edge map with smooth flowing contours",
+ "soft-edged map from hed detection",
+ "hed result showing refined continuous edges",
+ "edge map with natural well-connected outlines",
+ "hed output featuring smooth detailed boundaries",
+ "elegant edge map with seamless transitions",
+ "hed edge map displaying clean holistic contours",
+ "refined edge lines from hed detection",
+ "hed map with flowing natural object outlines",
+ "edge map revealing smooth interconnected shapes",
+ ],
+ "normal": [
+ "normal map showing surface orientation details",
+ "rgb-coded normal map for 3D lighting",
+ "normal map with encoded surface normals",
+ "detailed normal map for texture shading",
+ "normal map highlighting surface curvature",
+ "rgb normal map for bump mapping effects",
+ "normal map capturing fine geometric details",
+ "surface normal visualization in rgb colors",
+ "normal map for realistic lighting interaction",
+ "normal map displaying directional surface data",
+ ],
+ "mlsd": [
+ "mlsd detected straight line segments",
+ "line segments extracted using mlsd",
+ "mlsd output showing precise straight lines",
+ "straight edges detected by mlsd algorithm",
+ "mlsd result with clean line segment boundaries",
+ "line segment map generated by mlsd",
+ "mlsd-detected straight structural lines",
+ "straight line visualization from mlsd",
+ "mlsd-based line segment detection output",
+ "line segments highlighted by mlsd method",
+ ],
+ "openpose": [
+ "openpose skeleton with colorful connecting lines",
+ "body keypoints linked by bright colored lines",
+ "openpose output showing joints and vibrant skeleton",
+ "human pose with colored lines for bone structure",
+ "openpose-detected keypoints and colorful limbs",
+ "skeletal lines in vivid colors from openpose",
+ "body joints connected by multicolored straight lines",
+ "openpose visualization with colorful skeletal links",
+ "keypoints and bright lines forming body skeleton",
+ "human pose mapped with colored lines by openpose",
+ ],
+ "sam2_mask": [
+ "sam 2 generated colorful segmentation masks",
+ "color-coded masks from sam 2 segmentation",
+ "sam 2 output with vibrant object masks",
+ "segmentation masks in bright colors by sam 2",
+ "colorful object masks from sam 2 detection",
+ "sam 2 result showing multicolored regions",
+ "masks with distinct colors from sam 2",
+ "sam 2 segmentation with vivid mask overlays",
+ "colorful masks highlighting objects via sam 2",
+ "sam 2-generated masks with rich color coding",
+ ],
+ "uniformer": [
+ "color-coded objects in uniformer segmentation",
+ "uniformer map with colored object blocks",
+ "objects as distinct color patches by uniformer",
+ "color blocks representing objects in uniformer",
+ "uniformer output with colored object regions",
+ "objects highlighted as color zones in uniformer",
+ "uniformer segmentation showing color-divided objects",
+ "color patches for objects in uniformer result",
+ "uniformer map with objects as solid color areas",
+ "objects segmented as colored blocks by uniformer",
+ "uniformer map with objects as solid color areas",
+ ],
+ "mask": [
+ "Color-coded objects in open-world segmentation",
+ "Distinct colors marking different objects",
+ "Objects highlighted as unique color patches",
+ "Color blocks representing diverse objects",
+ "Segmented image with varied color zones",
+ "Objects visualized as solid color regions",
+ "Colorful map of open-world object segmentation",
+ "Objects divided by vibrant color boundaries",
+ "Color-coded segmentation of diverse items",
+ "Objects mapped as distinct colored areas",
+ ],
+ "foreground": [
+ "Foreground on solid color canvas",
+ "Image with foreground on plain backdrop",
+ "Foreground placed on monochrome background",
+ "Objects on solid color base",
+ "Foreground isolated on uniform color",
+ "Segmented subject on plain color field",
+ "Foreground displayed on solid color",
+ "Image with foreground on solid backdrop",
+ "Foreground on a clean color canvas",
+ "Objects on a solid color background",
+ ],
+ "background": [
+ "Background-only image with foreground masked",
+ "Photo showing background after masking foreground",
+ "Image with foreground removed leaving background",
+ "Background revealed by masking the foreground",
+ "Foreground masked to expose background",
+ "Picture with background visible after masking",
+ "Image displaying background without foreground",
+ "Foreground erased leaving only background",
+ "Background isolated by masking the foreground",
+ "Photo with foreground hidden showing background",
+ ],
+ # Style
+ "style_source": [
+ "Image in a distinct artistic style",
+ "Artistically styled picture with unique flair",
+ "Photo showcasing a specific art style",
+ "Image with a clear artistic aesthetic",
+ "Art-style influenced visual composition",
+ "Picture reflecting a particular art movement",
+ "Image with bold artistic characteristics",
+ "Artistically rendered visual content",
+ "Photo with a strong artistic theme",
+ "Image embodying a defined art style",
+ ],
+ "style_target": [
+ "High-quality image with striking artistic style",
+ "Crisp photo showcasing bold artistic flair",
+ "Visually stunning image with artistic influence",
+ "High-definition picture in a unique art style",
+ "Artistically styled image with exceptional clarity",
+ "High-quality visual with distinct artistic touch",
+ "Sharp photo reflecting a clear artistic theme",
+ "Artistically crafted image with high resolution",
+ "Vibrant picture blending quality and art style",
+ "High-aesthetic image with artistic precision",
+ ],
+ # Editing
+ "DepthEdit": [
+ "a high-quality image",
+ "an aesthetically pleasing photograph",
+ "a high-resolution image",
+ "an image with vivid details",
+ "a visually striking and clear picture",
+ "a high-definition image",
+ "an image with artistic appeal",
+ "a sharp and beautifully composed photograph",
+ "a high-aesthetic image",
+ "an image with flawless clarity",
+ "a vibrant and professionally captured photo",
+ "a crystal-clear image",
+ "an image with artistic quality",
+ "a high-quality image with exceptional detail",
+ "a photo realistic image",
+ ],
+ "FillEdit": [
+ "a high-quality image",
+ "an aesthetically pleasing photograph",
+ "a high-resolution image",
+ "an image with vivid details",
+ "a visually striking and clear picture",
+ "a high-definition image",
+ "an image with artistic appeal",
+ "a sharp and beautifully composed photograph",
+ "a high-aesthetic image",
+ "an image with flawless clarity",
+ "a vibrant and professionally captured photo",
+ "a crystal-clear image",
+ "an image with artistic quality",
+ "a high-quality image with exceptional detail",
+ "a photo realistic image",
+ ],
+ # degradation
+ # Blur
+ "blur": [
+ "a softly blurred image with smooth transitions",
+ "a photograph with a gentle motion blur effect",
+ "an image exhibiting subtle Gaussian blur",
+ "a picture with a light and even blurring",
+ "a softly defocused photograph with reduced sharpness",
+ "an image featuring a mild blur for artistic effect",
+ "a photograph with a gentle out-of-focus appearance",
+ "a softly smeared image with smooth edges",
+ "a picture with a light blur enhancing the mood",
+ "an image with a delicate blur creating a dreamy effect",
+ ],
+ "compression": [
+ "a highly compressed image with noticeable artifacts",
+ "a photograph showing compression-induced quality loss",
+ "an image with visible compression artifacts and reduced clarity",
+ "a picture exhibiting blocky artifacts from compression",
+ "a compressed photo with color banding and loss of detail",
+ "an image displaying noticeable compression noise",
+ "a photograph with degraded quality due to high compression",
+ "a picture showing pixelation from aggressive compression",
+ "an image with artifacts and reduced resolution from compression",
+ "a compressed image featuring loss of sharpness and detail",
+ ],
+ "SRx2": [
+ "an image downsampled by a factor of 2 with enhanced details",
+ "a photograph resized to half its original resolution",
+ "an downscaled image (2x) maintaining image quality",
+ "a picture downsized by 2x with preserved sharpness",
+ "an image scaled half its size with clear details",
+ "a low-resolution version of the original image (2x)",
+ "a half-resolution photograph with maintained clarity",
+ "an image decreased in size by 2x with minimal quality loss",
+ "a 2x downscaled picture retaining original details",
+ "an image resized to half its original dimensions with enhanced quality",
+ ],
+ "SRx4": [
+ "an image downsampled by a factor of 4 with enhanced details",
+ "a photograph resized to quarter its original resolution",
+ "an downscaled image (4x) maintaining image quality",
+ "a picture downsized by 4x with preserved sharpness",
+ "an image scaled four times its size with clear details",
+ "a low-resolution version of the original image (4x)",
+ "a quadruple-resolution photograph with maintained clarity",
+ "an image decreased in size by 4x with minimal quality loss",
+ "a 4x downscaled picture retaining original details",
+ "an image resized to quarter its original dimensions with enhanced quality",
+ ],
+ "pixelate": [
+ "a heavily pixelated image with large blocks",
+ "a picture showing strong pixelation effects",
+ "an image with noticeable pixel blocks obscuring details",
+ "a pixelated photograph with reduced image clarity",
+ "an image exhibiting coarse pixelation for a stylized look",
+ "a picture with large pixel squares creating a mosaic effect",
+ "a highly pixelated photo obscuring fine details",
+ "an image featuring prominent pixelation and blockiness",
+ "a pixelated image with distinct square blocks",
+ "a photograph with exaggerated pixelation for artistic effect",
+ ],
+ "Defocus": [
+ "a defocused image with soft and blurry regions",
+ "a photograph with intentional defocus creating a shallow depth of field",
+ "an image exhibiting a defocused background with a clear subject",
+ "a picture with selective defocus enhancing the main object",
+ "a defocused photo with smooth out-of-focus areas",
+ "an image showing a defocused effect for artistic blurring",
+ "a photograph with a softly defocused foreground",
+ "a picture with partial defocus creating a dreamy appearance",
+ "an image featuring defocus to highlight specific areas",
+ "a defocused photograph with gentle blurring around the subject",
+ ],
+ "GaussianBlur": [
+ "an image with Gaussian blurring creating a soft focus effect",
+ "a photograph with a Gaussian blur enhancing the subject",
+ "a picture with Gaussian blurring to highlight the main object",
+ "an image featuring Gaussian blur to soften the background",
+ "a Gaussian-blurred photograph with a soft focus",
+ "a Gaussian-blurred image with a gentle focus on the subject",
+ "a picture with Gaussian blurring to emphasize the main subject",
+ "an image with Gaussian blurring to create a dreamy effect",
+ "a Gaussian-blurred photograph with a soft focus on the main object",
+ ],
+ # Sharpen
+ "oversharpen": [
+ "an image with excessive sharpening creating halos around edges",
+ "a photograph overly sharpened with exaggerated edge contrast",
+ "an oversharpened picture showing unnatural edge highlights",
+ "a highly sharpened image with pronounced texture details",
+ "a picture exhibiting over-sharpening with visible artifacts",
+ "an image with extreme sharpening enhancing all details sharply",
+ "a photograph with oversharpened edges and increased contrast",
+ "an overly sharpened image causing unnatural texture emphasis",
+ "a picture with excessive sharpening effects on all elements",
+ "an image displaying over-sharpened features with enhanced edges",
+ ],
+ # Noise
+ "GaussianNoise": [
+ "an image with subtle Gaussian noise adding grain",
+ "a photograph exhibiting Gaussian noise for a textured look",
+ "a picture with light Gaussian noise enhancing realism",
+ "an image featuring Gaussian noise with smooth distribution",
+ "a photo with added Gaussian noise creating a grainy effect",
+ "an image showing gentle Gaussian noise for artistic texture",
+ "a photograph with mild Gaussian noise increasing depth",
+ "a picture with soft Gaussian noise enhancing the image",
+ "an image displaying Gaussian noise for a vintage feel",
+ "a photo with Gaussian noise subtly integrated into the image",
+ ],
+ "PoissonNoise": [
+ "an image with Poisson noise creating photon distribution effects",
+ "a photograph exhibiting Poisson noise for realistic grain",
+ "a picture with added Poisson noise enhancing texture",
+ "an image featuring Poisson noise with natural variance",
+ "a photo with Poisson noise simulating low-light conditions",
+ "an image showing Poisson noise for authentic grain patterns",
+ "a photograph with mild Poisson noise increasing image depth",
+ "a picture with Poisson noise adding subtle texture",
+ "an image displaying Poisson noise for a realistic appearance",
+ "a photo with Poisson noise integrated for enhanced realism",
+ ],
+ "SPNoise": [
+ "an image with salt and pepper noise introducing random pixels",
+ "a photograph exhibiting SP noise with black and white speckles",
+ "a picture with added salt and pepper noise creating scattered dots",
+ "an image featuring SP noise with random pixel disruptions",
+ "a photo with SP noise simulating transmission errors",
+ "an image showing salt and pepper noise for a gritty effect",
+ "a photograph with mild SP noise adding texture variation",
+ "a picture with SP noise introducing random black and white pixels",
+ "an image displaying salt and pepper noise for a distressed look",
+ "a photo with SP noise integrated for a speckled appearance",
+ ],
+ # Mosaic
+ "mosaic": [
+ "an image with a strong mosaic effect obscuring details",
+ "a photograph exhibiting mosaic patterns with large tiles",
+ "a picture with applied mosaic effect creating a tiled appearance",
+ "an image featuring mosaic blocks for privacy masking",
+ "a photo with mosaic segmentation highlighting regions",
+ "an image showing a mosaic overlay for abstract effect",
+ "a photograph with mosaic patterns simplifying the image",
+ "a picture with a mosaic filter creating geometric tiles",
+ "an image displaying a mosaic effect for stylistic purposes",
+ "a photo with mosaic segmentation emphasizing specific areas",
+ ],
+ # Contrast
+ "contrast_strengthen": [
+ "an image with enhanced contrast making colors pop",
+ "a photograph exhibiting strengthened contrast for vividness",
+ "a picture with increased contrast highlighting details",
+ "an image featuring heightened contrast for dramatic effect",
+ "a photo with boosted contrast enhancing visual depth",
+ "an image showing strengthened contrast with pronounced shadows and highlights",
+ "a photograph with amplified contrast for greater clarity",
+ "a picture with enhanced contrast making elements stand out",
+ "an image displaying increased contrast for a striking appearance",
+ "a photo with reinforced contrast improving overall image impact",
+ ],
+ "contrast_weaken": [
+ "an image with reduced contrast creating a softer look",
+ "a photograph exhibiting weakened contrast for a muted effect",
+ "a picture with decreased contrast making colors more subtle",
+ "an image featuring lowered contrast for a gentle appearance",
+ "a photo with diminished contrast softening the overall image",
+ "an image showing weakened contrast with less pronounced shadows and highlights",
+ "a photograph with reduced contrast for a flatter visual tone",
+ "a picture with softened contrast creating a delicate atmosphere",
+ "an image displaying decreased contrast for a subdued look",
+ "a photo with lowered contrast enhancing a calm and serene feel",
+ ],
+ # Quantization
+ "quantization": [
+ "an image with quantization artifacts reducing color depth",
+ "a photograph exhibiting quantization leading to banding effects",
+ "a picture with applied quantization simplifying color gradients",
+ "an image featuring quantized color levels creating discrete steps",
+ "a photo with quantization reducing the number of distinct colors",
+ "an image showing quantization leading to posterization effects",
+ "a photograph with quantized color palette for a stylized look",
+ "a picture with quantization introducing color banding and loss of detail",
+ "an image displaying quantization effects on smooth color transitions",
+ "a photo with quantization artifacts simplifying the overall color scheme",
+ ],
+ "JPEG": [
+ "a JPEG-compressed image with noticeable compression artifacts",
+ "a photograph saved in JPEG format showing quality loss",
+ "an image exhibiting JPEG artifacts like blockiness and blurring",
+ "a picture with JPEG compression leading to reduced clarity",
+ "an image featuring JPEG-induced artifacts affecting image quality",
+ "a photo with visible JPEG compression effects on details",
+ "an image showing JPEG artifacts such as color banding and pixelation",
+ "a photograph with degraded quality due to JPEG compression",
+ "a picture with JPEG compression artifacts impacting the overall appearance",
+ "an image displaying JPEG-induced quality loss with blurred edges",
+ ],
+ # Light
+ "brighten": [
+ "a brightly lit image with enhanced luminosity",
+ "a photograph exhibiting increased brightness for a vibrant look",
+ "a picture with boosted brightness making the scene more radiant",
+ "an image featuring heightened brightness illuminating all areas",
+ "a photo with amplified brightness creating a sunny appearance",
+ "an image showing increased brightness enhancing visibility",
+ "a photograph with enhanced brightness making colors more vivid",
+ "a picture with boosted luminosity brightening the overall image",
+ "an image displaying heightened brightness for a luminous effect",
+ "a photo with increased brightness adding warmth and clarity",
+ ],
+ "darken": [
+ "a darkened image with reduced luminosity creating a moody atmosphere",
+ "a photograph exhibiting decreased brightness for a subdued look",
+ "a picture with lowered brightness making the scene more somber",
+ "an image featuring diminished brightness enhancing shadows",
+ "a photo with reduced brightness creating a twilight appearance",
+ "an image showing decreased brightness adding depth and contrast",
+ "a photograph with darkened tones making colors more muted",
+ "a picture with lowered luminosity creating a dramatic effect",
+ "an image displaying reduced brightness for a darker aesthetic",
+ "a photo with decreased brightness enhancing the mysterious mood",
+ ],
+ "LowLight": [
+ "an image with low light conditions creating a dim and shadowy appearance",
+ "a photograph exhibiting low light to simulate night-time conditions",
+ "a picture with reduced illumination to create a night-time ambiance",
+ "an image featuring low light to emphasize the subject in darkness",
+ "a photo with low light conditions creating a mysterious mood",
+ "an image showing low light to enhance the dramatic lighting of the scene",
+ "a photograph with dim lighting to create a soft and dreamy effect",
+ "a picture with low light to emphasize the texture and details of the image",
+ "an image displaying low light conditions for a serene and peaceful feel",
+ ],
+ # Color
+ "saturate_strengthen": [
+ "an image with enhanced saturation making colors more vivid",
+ "a photograph exhibiting strengthened saturation for vibrant hues",
+ "a picture with boosted color saturation enhancing visual appeal",
+ "an image featuring heightened saturation creating rich color tones",
+ "a photo with amplified saturation making colors pop",
+ "an image showing increased saturation for a lively appearance",
+ "a photograph with saturated colors enhancing the overall image",
+ "a picture with strengthened color saturation adding vibrancy",
+ "an image displaying enhanced saturation for a dynamic look",
+ "a photo with boosted color intensity making the scene more colorful",
+ ],
+ "saturate_weaken": [
+ "an image with reduced saturation creating a muted color palette",
+ "a photograph exhibiting weakened saturation for subdued tones",
+ "a picture with lowered color saturation making colors more subtle",
+ "an image featuring diminished saturation creating a pastel look",
+ "a photo with decreased saturation softening the overall colors",
+ "an image showing reduced saturation for a faded appearance",
+ "a photograph with desaturated colors enhancing a minimalist aesthetic",
+ "a picture with weakened color saturation adding a calm feel",
+ "an image displaying lowered saturation for a gentle color scheme",
+ "a photo with diminished color intensity creating a subdued look",
+ ],
+ "gray": [
+ "a grayscale image with varying shades of gray",
+ "a black and white photograph emphasizing contrast and texture",
+ "a gray-toned picture highlighting light and shadow",
+ "an image converted to grayscale showcasing structural details",
+ "a monochromatic photo with rich gray gradients",
+ "a grayscale image emphasizing form and composition",
+ "a black and white picture with balanced gray tones",
+ "an image in gray scale enhancing depth and dimension",
+ "a monochrome photograph focusing on texture and contrast",
+ "a gray-toned image presenting a classic black and white aesthetic",
+ ],
+ "ColorDistortion": [
+ "an image with distorted and surreal colors",
+ "a picture featuring unnatural color tones",
+ "a visually striking image with altered hues",
+ "a photo showcasing disrupted color balance",
+ "an image with vibrant and unexpected colors",
+ "a picture displaying shifted color spectrums",
+ "an artwork-like image with perturbed colors",
+ "a photo with dreamlike and distorted hues",
+ "an image with unconventional color variations",
+ "a visually unique picture with color shifts",
+ ],
+ # Infilling
+ "Inpainting": [
+ "an inpainted image seamlessly filling missing areas",
+ "a photograph with inpainting repairing damaged regions",
+ "a picture featuring inpainting to restore obscured parts",
+ "an image using inpainting to complete incomplete areas",
+ "a photo with inpainting blending filled regions naturally",
+ "an image showing inpainting techniques removing unwanted objects",
+ "a photograph with inpainting reconstructing missing details",
+ "a picture utilizing inpainting to enhance image continuity",
+ "an image with inpainting seamlessly integrating filled sections",
+ "a photo using inpainting to mend and complete the visual content",
+ ],
+ # Rotate
+ "rotate90": [
+ "an image rotated 90 degrees clockwise for a new perspective",
+ "a photograph turned 90 degrees to the right altering the orientation",
+ "a picture rotated a quarter turn clockwise enhancing composition",
+ "an image featuring a 90-degree rotation adjusting the viewpoint",
+ "a photo with a 90-degree clockwise rotation changing the layout",
+ "an image showing a rotated view at 90 degrees for a fresh angle",
+ "a photograph rotated right by 90 degrees for dynamic framing",
+ "a picture with a 90-degree turn clockwise modifying the scene",
+ "an image displaying a 90-degree rotated orientation for visual interest",
+ "a photo rotated ninety degrees to enhance the composition",
+ ],
+ "rotate180": [
+ "an image rotated 180 degrees flipping it upside down",
+ "a photograph turned completely around with a 180-degree rotation",
+ "a picture rotated halfway, creating an inverted perspective",
+ "an image featuring a 180-degree turn altering the original orientation",
+ "a photo with an upside-down view due to 180-degree rotation",
+ "an image showing a flipped perspective with a 180-degree rotation",
+ "a photograph rotated twice around, changing the viewpoint",
+ "a picture with a half-turn rotation modifying the scene layout",
+ "an image displaying a 180-degree rotated orientation for a unique angle",
+ "a photo rotated one full half-circle to invert the composition",
+ ],
+ "rotate270": [
+ "an image rotated 270 degrees clockwise for a new angle",
+ "a photograph turned 270 degrees to the right altering the orientation",
+ "a picture rotated three quarters turn clockwise enhancing composition",
+ "an image featuring a 270-degree rotation adjusting the viewpoint",
+ "a photo with a 270-degree clockwise rotation changing the layout",
+ "an image showing a rotated view at 270 degrees for a fresh angle",
+ "a photograph rotated right by 270 degrees for dynamic framing",
+ "a picture with a 270-degree turn clockwise modifying the scene",
+ "an image displaying a 270-degree rotated orientation for visual interest",
+ "a photo rotated two and a half turns clockwise to enhance the composition",
+ ],
+ # Other
+ "Barrel": [
+ "an image with barrel distortion bending the edges outward",
+ "a photograph exhibiting barrel distortion creating a convex effect",
+ "a picture with barrel distortion warping the image edges",
+ "an image featuring barrel distortion causing peripheral stretching",
+ "a photo with barrel distortion curving the sides outward",
+ "an image showing barrel distortion for a fisheye lens effect",
+ "a photograph with warped edges due to barrel distortion",
+ "a picture with barrel distortion altering the straight lines",
+ "an image displaying barrel distortion creating a rounded appearance",
+ "a photo with barrel distortion enhancing the central focus",
+ ],
+ "Pincushion": [
+ "an image with pincushion distortion bending the edges inward",
+ "a photograph exhibiting pincushion distortion creating a concave effect",
+ "a picture with pincushion distortion warping the image edges inward",
+ "an image featuring pincushion distortion causing peripheral compression",
+ "a photo with pincushion distortion curving the sides inward",
+ "an image showing pincushion distortion for a telephoto lens effect",
+ "a photograph with warped edges due to pincushion distortion",
+ "a picture with pincushion distortion altering the straight lines inward",
+ "an image displaying pincushion distortion creating a pinched appearance",
+ "a photo with pincushion distortion enhancing the central focus inward",
+ ],
+ "Elastic": [
+ "an image with elastic deformation creating fluid distortions",
+ "a photograph exhibiting elastic transformations warping the structure",
+ "a picture with elastic effects bending and stretching elements",
+ "an image featuring elastic distortions for a dynamic appearance",
+ "a photo with elastic transformations altering the image geometry",
+ "an image showing elastic deformation for a fluid, wavy effect",
+ "a photograph with elastic warping adding motion-like distortions",
+ "a picture with elastic effects creating flexible and dynamic shapes",
+ "an image displaying elastic transformations enhancing creative distortion",
+ "a photo with elastic deformation modifying the original image structure",
+ ],
+ # Spatial Effect
+ "Rain": [
+ "an image with realistic rain effects adding dynamic streaks",
+ "a photograph exhibiting rain overlays creating a wet atmosphere",
+ "a picture with rain effects enhancing the scene with falling droplets",
+ "an image featuring rain streaks adding motion and mood",
+ "a photo with simulated rain creating a rainy day ambiance",
+ "an image showing rain effects with dynamic water droplets",
+ "a photograph with rain overlays adding a sense of movement",
+ "a picture with rain effects enhancing the visual texture",
+ "an image displaying rain streaks for a dramatic weather effect",
+ "a photo with realistic rain adding depth and atmosphere",
+ ],
+ "Frost": [
+ "an image with frost overlays creating icy textures",
+ "a photograph exhibiting frost effects adding a chilly ambiance",
+ "a picture with frost patterns enhancing the scene with icy details",
+ "an image featuring frost overlays creating a frozen appearance",
+ "a photo with simulated frost adding a wintry atmosphere",
+ "an image showing frost effects with delicate ice patterns",
+ "a photograph with frost overlays adding a sense of coldness",
+ "a picture with frost effects enhancing the visual texture with ice",
+ "an image displaying frost patterns for a frosty weather effect",
+ "a photo with realistic frost adding depth and a chilly mood",
+ ],
+ }
+ if image_type in style_list:
+ return [random.choice(image_prompts["style_source"]), random.choice(image_prompts["style_target"])]
+ elif image_type == 'clothing':
+ return [random.choice(image_prompts["clothing"]), random.choice(image_prompts["fullbody"])]
+ else:
+ return [random.choice(image_prompts[image_type])]
+
+
+def get_layout_instruction(cols, rows):
+ layout_instruction = [
+ f"A grid layout with {rows} rows and {cols} columns, displaying {cols*rows} images arranged side by side.",
+ f"{cols*rows} images are organized into a grid of {rows} rows and {cols} columns, evenly spaced.",
+ f"A {rows}x{cols} grid containing {cols*rows} images, aligned in a clean and structured layout.",
+ f"{cols*rows} images are placed in a grid format with {rows} horizontal rows and {cols} vertical columns.",
+ f"A visual grid composed of {rows} rows and {cols} columns, showcasing {cols*rows} images in a balanced arrangement.",
+ f"{cols*rows} images form a structured grid, with {rows} rows and {cols} columns, neatly aligned.",
+ f"A {rows}x{cols} grid layout featuring {cols*rows} images, arranged side by side in a precise pattern.",
+ f"{cols*rows} images are displayed in a grid of {rows} rows and {cols} columns, creating a uniform visual structure.",
+ f"A grid with {rows} rows and {cols} columns, containing {cols*rows} images arranged in a symmetrical layout.",
+ f"{cols*rows} images are organized into a {rows}x{cols} grid, forming a cohesive and orderly display.",
+ ]
+ return random.choice(layout_instruction)
+
+
+def get_task_instruction(condition_prompt, target_prompt):
+ task_instruction = [
+ f"Each row outlines a logical process, starting from {condition_prompt}, to achieve {target_prompt}.",
+ f"In each row, a method is described to use {condition_prompt} for generating {target_prompt}.",
+ f"Each row presents a task that leverages {condition_prompt} to produce {target_prompt}.",
+ f"Every row demonstrates how to transform {condition_prompt} into {target_prompt} through a logical approach.",
+ f"Each row details a strategy to derive {target_prompt} based on the provided {condition_prompt}.",
+ f"In each row, a technique is explained to convert {condition_prompt} into {target_prompt}.",
+ f"Each row illustrates a pathway from {condition_prompt} to {target_prompt} using a clear logical task.",
+ f"Every row provides a step-by-step guide to evolve {condition_prompt} into {target_prompt}.",
+ f"Each row describes a process that begins with {condition_prompt} and results in {target_prompt}.",
+ f"In each row, a logical task is demonstrated to achieve {target_prompt} based on {condition_prompt}.",
+ ]
+ return random.choice(task_instruction)
+
+
+def get_content_instruction():
+ content_instruction = [
+ "The content of the last image in the final row is: ",
+ "The last image of the last row depicts: ",
+ "In the final row, the last image shows: ",
+ "The last image in the bottom row illustrates: ",
+ "The content of the bottom-right image is: ",
+ "The final image in the last row portrays: ",
+ "The last image of the final row displays: ",
+ "In the last row, the final image captures: ",
+ "The bottom-right corner image presents: ",
+ "The content of the last image in the concluding row is: ",
+ ]
+ return random.choice(content_instruction)
+
+
+graph200k_task_dicts = [
+ {
+ "task_name": "conditional generation",
+ "sample_weight": 1,
+ "image_list": [
+ ["canny", "target"],
+ ["depth", "target"],
+ ["hed", "target"],
+ ["normal", "target"],
+ ["mlsd", "target"],
+ ["openpose", "target"],
+ ["sam2_mask", "target"],
+ ["uniformer", "target"],
+ ["mask", "target"],
+ ["foreground", "target"],
+ ["background", "target"],
+ ],
+ },
+ {
+ "task_name": "conditional generation with reference",
+ "sample_weight": 1,
+ "image_list": [
+ ["reference", "canny", "target"],
+ ["reference", "depth", "target"],
+ ["reference", "hed", "target"],
+ ["reference", "normal", "target"],
+ ["reference", "mlsd", "target"],
+ ["reference", "openpose", "target"],
+ ["reference", "sam2_mask", "target"],
+ ["reference", "uniformer", "target"],
+ ["reference", "mask", "target"],
+ ["reference", "background", "target"],
+ ],
+ },
+ {
+ "task_name": "conditional generation with style",
+ "sample_weight": 1,
+ "image_list": [
+ # instant style
+ ["canny", "InstantStyle"],
+ ["depth", "InstantStyle"],
+ ["hed", "InstantStyle"],
+ ["normal", "InstantStyle"],
+ ["mlsd", "InstantStyle"],
+ ["openpose", "InstantStyle"],
+ ["sam2_mask", "InstantStyle"],
+ ["uniformer", "InstantStyle"],
+ ["mask", "InstantStyle"],
+ # redux style
+ ["canny", "ReduxStyle"],
+ ["depth", "ReduxStyle"],
+ ["hed", "ReduxStyle"],
+ ["normal", "ReduxStyle"],
+ ["mlsd", "ReduxStyle"],
+ ["openpose", "ReduxStyle"],
+ ["sam2_mask", "ReduxStyle"],
+ ["uniformer", "ReduxStyle"],
+ ["mask", "ReduxStyle"],
+ ],
+ },
+ {
+ "task_name": "image generation with reference",
+ "sample_weight": 1,
+ "image_list": [
+ ["reference", "target"],
+ ],
+ },
+ {
+ "task_name": "subject extraction",
+ "sample_weight": 1,
+ "image_list": [
+ ["target", "reference"],
+ ],
+ },
+ {
+ "task_name": "style transfer",
+ "sample_weight": 1,
+ "image_list": [
+ ["target", "InstantStyle"],
+ ["target", "ReduxStyle"],
+ ["reference", "InstantStyle"],
+ ],
+ },
+ {
+ "task_name": "style transfer with condition",
+ "sample_weight": 1,
+ "image_list": [
+ ["reference", "canny", "InstantStyle"],
+ ["reference", "depth", "InstantStyle"],
+ ["reference", "hed", "InstantStyle"],
+ ["reference", "normal", "InstantStyle"],
+ ["reference", "mlsd", "InstantStyle"],
+ ["reference", "openpose", "InstantStyle"],
+ ["reference", "sam2_mask", "InstantStyle"],
+ ["reference", "uniformer", "InstantStyle"],
+ ["reference", "mask", "InstantStyle"],
+ ],
+ },
+ {
+ "task_name": "image editing",
+ "sample_weight": 1,
+ "image_list": [
+ ["DepthEdit", "target"],
+ ["FillEdit", "target"],
+ ],
+ },
+ {
+ "task_name": "image editing with reference",
+ "sample_weight": 1,
+ "image_list": [
+ ["reference", "DepthEdit", "target"],
+ ["reference", "FillEdit", "target"],
+ ],
+ },
+ {
+ "task_name": "dense prediction",
+ "sample_weight": 1,
+ "image_list": [
+ ["target", "canny"],
+ ["target", "depth"],
+ ["target", "hed"],
+ ["target", "normal"],
+ ["target", "mlsd"],
+ ["target", "openpose"],
+ ["target", "sam2_mask"],
+ ["target", "uniformer"],
+ ],
+ },
+ {
+ "task_name": "restoration",
+ "sample_weight": 1,
+ "image_list": [
+ # blur related
+ ["blur", "target"],
+ ["compression", "target"],
+ ["SRx2", "target"],
+ ["SRx4", "target"],
+ ["pixelate", "target"],
+ ["Defocus", "target"],
+ ["GaussianBlur", "target"],
+
+ # sharpen related
+ ["oversharpen", "target"],
+
+ # noise related
+ ["GaussianNoise", "target"],
+ ["PoissonNoise", "target"],
+ ["SPNoise", "target"],
+
+ # mosaic
+ ["mosaic", "target"],
+
+ # contrast related
+ ["contrast_strengthen", "target"],
+ ["contrast_weaken", "target"],
+
+ # quantization related
+ ["quantization", "target"],
+ ["JPEG", "target"],
+
+ # light related
+ ["brighten", "target"],
+ ["darken", "target"],
+ ["LowLight", "target"],
+
+ # color related
+ ["saturate_strengthen", "target"],
+ ["saturate_weaken", "target"],
+ ["gray", "target"],
+ ["ColorDistortion", "target"],
+
+ # infilling
+ ["Inpainting", "target"],
+
+ # rotation related
+ ["rotate90", "target"],
+ ["rotate180", "target"],
+ ["rotate270", "target"],
+
+ # distortion related
+ ["Barrel", "target"],
+ ["Pincushion", "target"],
+ ["Elastic", "target"],
+
+ # special effects
+ ["Rain", "target"],
+ ["Frost", "target"]
+ ],
+ },
+ {
+ "task_name": "restoration with reference",
+ "sample_weight": 1,
+ "image_list": [
+ # blur related
+ ["reference", "blur", "target"],
+ ["reference", "compression", "target"],
+ ["reference", "SRx2", "target"],
+ ["reference", "SRx4", "target"],
+ ["reference", "pixelate", "target"],
+ ["reference", "Defocus", "target"],
+ ["reference", "GaussianBlur", "target"], # new
+ # sharpen related
+ ["reference", "oversharpen", "target"],
+ # noise related
+ ["reference", "GaussianNoise", "target"],
+ ["reference", "PoissonNoise", "target"],
+ ["reference", "SPNoise", "target"],
+ # mosaic
+ ["reference", "mosaic", "target"],
+ # contrast related
+ ["reference", "contrast_strengthen", "target"],
+ ["reference", "contrast_weaken", "target"],
+ # quantization related
+ ["reference", "quantization", "target"],
+ ["reference", "JPEG", "target"],
+ # light related
+ ["reference", "brighten", "target"],
+ ["reference", "darken", "target"],
+ ["reference", "LowLight", "target"], # new
+ # color related
+ ["reference", "saturate_strengthen", "target"],
+ ["reference", "saturate_weaken", "target"],
+ ["reference", "gray", "target"],
+ ["reference", "ColorDistortion", "target"],
+ # infilling
+ ["reference", "Inpainting", "target"],
+ # rotation related
+ ["reference", "rotate90", "target"],
+ ["reference", "rotate180", "target"],
+ ["reference", "rotate270", "target"],
+ # distortion related
+ ["reference", "Barrel", "target"],
+ ["reference", "Pincushion", "target"],
+ ["reference", "Elastic", "target"],
+ # special effects
+ ["reference", "Rain", "target"],
+ ["reference", "Frost", "target"]
+ ],
+ }
+]
+
+
+test_task_dicts = [
+ {
+ "task_name": "conditional generation",
+ "sample_weight": 1,
+ "image_list": [
+ ["canny", "target"],
+ ["depth", "target"],
+ ["hed", "target"],
+ ["normal", "target"],
+ ["mlsd", "target"],
+ ["openpose", "target"],
+ ["sam2_mask", "target"],
+ ["uniformer", "target"],
+ ["mask", "target"],
+ ["foreground", "target"],
+ ["background", "target"],
+ ],
+ },
+ {
+ "task_name": "image generation with reference",
+ "sample_weight": 1,
+ "image_list": [
+ ["reference", "target"],
+ ],
+ },
+ {
+ "task_name": "conditional generation with reference",
+ "sample_weight": 1,
+ "image_list": [
+ ["reference", "depth", "target"],
+ ["reference", "openpose", "target"],
+ ],
+ },
+ {
+ "task_name": "subject extraction",
+ "sample_weight": 0.2,
+ "image_list": [
+ ["target", "reference"],
+ ],
+ },
+ {
+ "task_name": "dense prediction",
+ "sample_weight": 1,
+ "image_list": [
+ ["target", "depth"],
+ ["target", "openpose"],
+ ],
+ },
+ {
+ "task_name": "restoration",
+ "sample_weight": 1,
+ "image_list": [
+ # blur related
+ ["GaussianBlur", "target"],
+
+ # infilling
+ ["Inpainting", "target"],
+
+ # rotation related
+ ["rotate90", "target"],
+
+ # distortion related
+ ["Elastic", "target"],
+ ],
+ },
+ {
+ "task_name": "restoration with reference",
+ "sample_weight": 1,
+ "image_list": [
+ # infilling
+ ["reference", "Inpainting", "target"],
+ ],
+ },
+ {
+ "task_name": "image editing with reference",
+ "sample_weight": 1,
+ "image_list": [
+ ["reference", "DepthEdit", "target"],
+ ["reference", "FillEdit", "target"],
+ ],
+ },
+ {
+ "task_name": "style transfer",
+ "sample_weight": 1,
+ "image_list": [
+ ["target", "InstantStyle"],
+ ["target", "ReduxStyle"],
+ ["reference", "InstantStyle"],
+ ],
+ },
+ {
+ "task_name": "style transfer with condition",
+ "sample_weight": 1,
+ "image_list": [
+ ["reference", "canny", "InstantStyle"],
+ ["reference", "depth", "InstantStyle"],
+ ["reference", "hed", "InstantStyle"],
+ ["reference", "normal", "InstantStyle"],
+ ["reference", "mlsd", "InstantStyle"],
+ ["reference", "openpose", "InstantStyle"],
+ ["reference", "sam2_mask", "InstantStyle"],
+ ["reference", "uniformer", "InstantStyle"],
+ ["reference", "mask", "InstantStyle"],
+ ],
+ },
+ {
+ "task_name": "subject extraction",
+ "sample_weight": 1,
+ "image_list": [
+ ["target", "reference"],
+ ],
+ },
+]
\ No newline at end of file
diff --git a/degradation_toolkit/__init__.py b/degradation_toolkit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/degradation_toolkit/add_degradation_various.py b/degradation_toolkit/add_degradation_various.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e16fb38f02d84358e6f74903b837da75b98d31e
--- /dev/null
+++ b/degradation_toolkit/add_degradation_various.py
@@ -0,0 +1,401 @@
+import os
+import numpy as np
+import random
+import cv2
+import math
+from scipy import special
+from skimage import restoration
+
+import torch
+from torch.nn import functional as F
+from torchvision.utils import make_grid
+
+
+def uint2single(img):
+ return np.float32(img/255.)
+
+
+def single2uint(img):
+ return np.uint8((img.clip(0, 1)*255.).round())
+
+
+def img2tensor(imgs, bgr2rgb=True, float32=True):
+ """Numpy array to tensor.
+ Args:
+ imgs (list[ndarray] | ndarray): Input images.
+ bgr2rgb (bool): Whether to change bgr to rgb.
+ float32 (bool): Whether to change to float32.
+ Returns:
+ list[tensor] | tensor: Tensor images. If returned results only have
+ one element, just return tensor.
+ """
+
+ def _totensor(img, bgr2rgb, float32):
+ if img.shape[2] == 3 and bgr2rgb:
+ if img.dtype == 'float64':
+ img = img.astype('float32')
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ img = torch.from_numpy(img.transpose(2, 0, 1))
+ if float32:
+ img = img.float()
+ return img
+
+ if isinstance(imgs, list):
+ return [_totensor(img, bgr2rgb, float32) for img in imgs]
+ else:
+ return _totensor(imgs, bgr2rgb, float32)
+
+
+def tensor2img(tensor, rgb2bgr=True, out_type=np.uint8, min_max=(0, 1)):
+ """Convert torch Tensors into image numpy arrays.
+ After clamping to [min, max], values will be normalized to [0, 1].
+ Args:
+ tensor (Tensor or list[Tensor]): Accept shapes:
+ 1) 4D mini-batch Tensor of shape (B x 3/1 x H x W);
+ 2) 3D Tensor of shape (3/1 x H x W);
+ 3) 2D Tensor of shape (H x W).
+ Tensor channel should be in RGB order.
+ rgb2bgr (bool): Whether to change rgb to bgr.
+ out_type (numpy type): output types. If ``np.uint8``, transform outputs
+ to uint8 type with range [0, 255]; otherwise, float type with
+ range [0, 1]. Default: ``np.uint8``.
+ min_max (tuple[int]): min and max values for clamp.
+ Returns:
+ (Tensor or list): 3D ndarray of shape (H x W x C) OR 2D ndarray of
+ shape (H x W). The channel order is BGR.
+ """
+ if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
+ raise TypeError(f'tensor or list of tensors expected, got {type(tensor)}')
+
+ if torch.is_tensor(tensor):
+ tensor = [tensor]
+ result = []
+ for _tensor in tensor:
+ _tensor = _tensor.squeeze(0).float().detach().cpu().clamp_(*min_max)
+ _tensor = (_tensor - min_max[0]) / (min_max[1] - min_max[0])
+
+ n_dim = _tensor.dim()
+ if n_dim == 4:
+ img_np = make_grid(_tensor, nrow=int(math.sqrt(_tensor.size(0))), normalize=False).numpy()
+ img_np = img_np.transpose(1, 2, 0)
+ if rgb2bgr:
+ img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+ elif n_dim == 3:
+ img_np = _tensor.numpy()
+ img_np = img_np.transpose(1, 2, 0)
+ if img_np.shape[2] == 1: # gray image
+ img_np = np.squeeze(img_np, axis=2)
+ else:
+ if rgb2bgr:
+ img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+ elif n_dim == 2:
+ img_np = _tensor.numpy()
+ else:
+ raise TypeError(f'Only support 4D, 3D or 2D tensor. But received with dimension: {n_dim}')
+ if out_type == np.uint8:
+ # Unlike MATLAB, numpy.unit8() WILL NOT round by default.
+ img_np = (img_np * 255.0).round()
+ img_np = img_np.astype(out_type)
+ result.append(img_np)
+ if len(result) == 1:
+ result = result[0]
+ return result
+
+
+def get_noise(img, value=10):
+
+ noise = np.random.uniform(0, 256, img.shape[0:2])
+
+ v = value * 0.01
+ noise[np.where(noise < (256 - v))] = 0
+
+ k = np.array([[0, 0.1, 0],
+ [0.1, 8, 0.1],
+ [0, 0.1, 0]])
+
+ noise = cv2.filter2D(noise, -1, k)
+
+ '''cv2.imshow('img',noise)
+ cv2.waitKey()
+ cv2.destroyWindow('img')'''
+ return noise
+
+
+def rain_blur(noise, length=10, angle=0, w=1):
+
+ trans = cv2.getRotationMatrix2D((length / 2, length / 2), angle - 45, 1 - length / 100.0)
+ dig = np.diag(np.ones(length))
+ k = cv2.warpAffine(dig, trans, (length, length))
+ k = cv2.GaussianBlur(k, (w, w), 0)
+
+ blurred = cv2.filter2D(noise, -1, k)
+
+ cv2.normalize(blurred, blurred, 0, 255, cv2.NORM_MINMAX)
+ blurred = np.array(blurred, dtype=np.uint8)
+
+ rain = np.expand_dims(blurred, 2)
+ blurred = np.repeat(rain, 3, 2)
+
+ return blurred
+
+
+def add_rain(img,value):
+ if np.max(img) > 1:
+ pass
+ else:
+ img = img*255
+
+
+ w, h, c = img.shape
+ h = h - (h % 4)
+ w = w - (w % 4)
+ img = img[0:w, 0:h, :]
+
+
+ w = np.random.choice([3, 5, 7, 9, 11], p=[0.2, 0.2, 0.2, 0.2, 0.2])
+ length = np.random.randint(30, 41)
+ angle = np.random.randint(-45, 45)
+
+ noise = get_noise(img, value=value)
+ rain = rain_blur(noise, length=length, angle=angle, w=w)
+
+ img = img.astype('float32') + rain
+ np.clip(img, 0, 255, out=img)
+ img = img/255.0
+ return img
+
+
+def add_rain_range(img, value_min, value_max):
+ value = np.random.randint(value_min, value_max)
+ if np.max(img) > 1:
+ pass
+ else:
+ img = img*255
+
+
+ w, h, c = img.shape
+ h = h - (h % 4)
+ w = w - (w % 4)
+ img = img[0:w, 0:h, :]
+
+
+ w = np.random.choice([3, 5, 7, 9, 11], p=[0.2, 0.2, 0.2, 0.2, 0.2])
+ length = np.random.randint(30, 41)
+ angle = np.random.randint(-45, 45)
+
+ noise = get_noise(img, value=value)
+ rain = rain_blur(noise, length=length, angle=angle, w=w)
+
+ img = img.astype('float32') + rain
+ np.clip(img, 0, 255, out=img)
+ img = img/255.0
+ return img
+
+
+def add_Poisson_noise(img, level=2):
+ # input range[0, 1]
+ vals = 10**(level)
+ img = np.random.poisson(img * vals).astype(np.float32) / vals
+ img = np.clip(img, 0.0, 1.0)
+ return img
+
+
+def add_Gaussian_noise(img, level=20):
+ # input range[0, 1]
+ noise_level = level / 255.0
+ noise_map = np.random.normal(loc=0.0, scale=1.0, size=img.shape)*noise_level
+ img += noise_map
+ img = np.clip(img, 0.0, 1.0)
+ return img
+
+
+def add_Gaussian_noise_range(img, min_level=10, max_level=50):
+ # input range[0, 1]
+ level = random.uniform(min_level, max_level)
+ noise_level = level / 255.0
+ noise_map = np.random.normal(loc=0.0, scale=1.0, size=img.shape)*noise_level
+ img += noise_map
+ img = np.clip(img, 0.0, 1.0)
+ return img
+
+
+def add_sp_noise(img, snr=0.95, salt_pro=0.5):
+ # input range[0, 1]
+ output = np.copy(img)
+ for i in range(img.shape[0]):
+ for j in range(img.shape[1]):
+ rdn = random.random()
+ if rdn < snr:
+ output[i][j] = img[i][j]
+ else:
+ rdn = random.random()
+ if rdn < salt_pro:
+ output[i][j] = 1
+ else:
+ output[i][j] = 0
+
+ return output
+
+
+def add_JPEG_noise(img, level):
+
+ quality_factor = level
+ img = single2uint(img)
+ _, encimg = cv2.imencode('.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), quality_factor])
+ img = cv2.imdecode(encimg, 1)
+ img = uint2single(img)
+
+ return img
+
+
+def add_JPEG_noise_range(img, level_min, level_max):
+
+ quality_factor = random.randint(level_min, level_max)
+ img = single2uint(img)
+ _, encimg = cv2.imencode('.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), quality_factor])
+ img = cv2.imdecode(encimg, 1)
+ img = uint2single(img)
+
+ return img
+
+
+def circular_lowpass_kernel(cutoff, kernel_size, pad_to=0):
+ """2D sinc filter, ref: https://dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter
+
+ Args:
+ cutoff (float): cutoff frequency in radians (pi is max)
+ kernel_size (int): horizontal and vertical size, must be odd.
+ pad_to (int): pad kernel size to desired size, must be odd or zero.
+ """
+ assert kernel_size % 2 == 1, 'Kernel size must be an odd number.'
+ kernel = np.fromfunction(
+ lambda x, y: cutoff * special.j1(cutoff * np.sqrt(
+ (x - (kernel_size - 1) / 2) ** 2 + (y - (kernel_size - 1) / 2) ** 2)) / ((2 * np.pi * np.sqrt(
+ (x - (kernel_size - 1) / 2) ** 2 + (y - (kernel_size - 1) / 2) ** 2)) + 1e-9), [kernel_size, kernel_size])
+ kernel[(kernel_size - 1) // 2, (kernel_size - 1) // 2] = cutoff ** 2 / (4 * np.pi)
+ kernel = kernel / np.sum(kernel)
+ if pad_to > kernel_size:
+ pad_size = (pad_to - kernel_size) // 2
+ kernel = np.pad(kernel, ((pad_size, pad_size), (pad_size, pad_size)))
+ return kernel
+
+
+def filter2D(img, kernel):
+ """PyTorch version of cv2.filter2D
+ Args:
+ img (Tensor): (b, c, h, w)
+ kernel (Tensor): (b, k, k)
+ """
+ k = kernel.size(-1)
+ b, c, h, w = img.size()
+ if k % 2 == 1:
+ img = F.pad(img, (k // 2, k // 2, k // 2, k // 2), mode='reflect')
+ else:
+ raise ValueError('Wrong kernel size')
+
+ ph, pw = img.size()[-2:]
+
+ if kernel.size(0) == 1:
+ # apply the same kernel to all batch images
+ img = img.view(b * c, 1, ph, pw)
+ kernel = kernel.view(1, 1, k, k)
+ return F.conv2d(img, kernel, padding=0).view(b, c, h, w)
+ else:
+ img = img.view(1, b * c, ph, pw)
+ kernel = kernel.view(b, 1, k, k).repeat(1, c, 1, 1).view(b * c, 1, k, k)
+ return F.conv2d(img, kernel, groups=b * c).view(b, c, h, w)
+
+
+def sinc(img, kernel_size,omega_c):
+
+ sinc_kernel = circular_lowpass_kernel(omega_c, kernel_size, pad_to=21)
+ sinc_kernel = torch.FloatTensor(sinc_kernel)
+
+ img = filter2D(img,sinc_kernel)
+
+ return img
+
+
+def add_ringing(img):
+ # input: [0, 1]
+ img = img2tensor([img])[0].unsqueeze(0)
+ ks = 15
+ omega_c = round(1.2, 2)
+ img = sinc(img, ks, omega_c)
+ img = torch.clamp((img * 255.0).round(), 0, 255) / 255.
+ img = tensor2img(img, min_max=(0, 1))
+ img = img/255.0
+ return img
+
+
+def low_light(img, lum_scale):
+ img = img*lum_scale
+ return img
+
+
+def low_light_range(img):
+ lum_scale = random.uniform(0.1, 0.5)
+ img = img*lum_scale
+ return img
+
+
+def iso_GaussianBlur(img, window, sigma):
+ img = cv2.GaussianBlur(img.copy(), (window, window), sigma)
+ return img
+
+
+def iso_GaussianBlur_range(img, window, min_sigma=2, max_sigma=4):
+ sigma = random.uniform(min_sigma, max_sigma)
+ img = cv2.GaussianBlur(img.copy(), (window, window), sigma)
+ return img
+
+
+def add_resize(img):
+ ori_H, ori_W = img.shape[0], img.shape[1]
+ rnum = np.random.rand()
+ if rnum > 0.8: # up
+ sf1 = random.uniform(1, 2)
+ elif rnum < 0.7: # down
+ sf1 = random.uniform(0.2, 1)
+ else:
+ sf1 = 1.0
+ img = cv2.resize(img, (int(sf1*img.shape[1]), int(sf1*img.shape[0])), interpolation=random.choice([1, 2, 3]))
+ img = cv2.resize(img, (int(ori_W), int(ori_H)), interpolation=random.choice([1, 2, 3]))
+
+ img = np.clip(img, 0.0, 1.0)
+
+ return img
+
+
+def r_l(img):
+ img = img2tensor([img],bgr2rgb=False)[0].unsqueeze(0)
+ psf = np.ones((1, 1, 5, 5))
+ psf = psf / psf.sum()
+ img = img.numpy()
+ img = np.pad(img, ((0, 0), (0, 0), (7, 7), (7, 7)), 'linear_ramp')
+ img = restoration.richardson_lucy(img, psf, 1)
+ img = img[:, :, 7:-7, 7:-7]
+ img = torch.from_numpy(img)
+ img = img.squeeze(0).numpy().transpose(1, 2, 0)
+ return img
+
+
+def inpainting(img,l_num,l_thick):
+
+ ori_h, ori_w = img.shape[0], img.shape[1]
+ mask = np.zeros((ori_h, ori_w, 3), np.uint8)
+ col = random.choice(['white', 'black'])
+ while (l_num):
+ x1, y1 = random.randint(0, ori_w), random.randint(0, ori_h)
+ x2, y2 = random.randint(0, ori_w), random.randint(0, ori_h)
+ pts = np.array([[x1, y1], [x2, y2]], np.int32)
+ pts = pts.reshape((-1, 1, 2))
+ mask = cv2.polylines(mask, [pts], 0, (1, 1, 1), l_thick)
+ l_num -= 1
+
+ if col == 'white':
+ img = np.clip(img + mask, 0, 1)
+ else:
+ img = np.clip(img - mask, 0, 1)
+
+ return img
diff --git a/degradation_toolkit/frost/frost1.png b/degradation_toolkit/frost/frost1.png
new file mode 100644
index 0000000000000000000000000000000000000000..c9edf9b6e1a2744d15af615af641f2aa48aa89c2
--- /dev/null
+++ b/degradation_toolkit/frost/frost1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff9f907860bd7a835d459e32f9d588062b7f61ee267343cc7222b56753a14755
+size 1199930
diff --git a/degradation_toolkit/frost/frost2.png b/degradation_toolkit/frost/frost2.png
new file mode 100644
index 0000000000000000000000000000000000000000..473ea3de89283621191b98205f7fb6b0b10ba72b
--- /dev/null
+++ b/degradation_toolkit/frost/frost2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe211a89b336999c207a852ce05818d4545d0b57c5beadd824b4cc9d9a9b6137
+size 299002
diff --git a/degradation_toolkit/frost/frost3.png b/degradation_toolkit/frost/frost3.png
new file mode 100644
index 0000000000000000000000000000000000000000..c898fc267e33dd03d40bbf7d72d21e78485918d0
--- /dev/null
+++ b/degradation_toolkit/frost/frost3.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d0d50b4a9bb213f38b024ef7768731bb83cc08d2f26b5766bbc167cdfa0e504
+size 299002
diff --git a/degradation_toolkit/frost/frost4.jpg b/degradation_toolkit/frost/frost4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..154cf6e0c6b3dae3cd3fbad8388658df047786b8
--- /dev/null
+++ b/degradation_toolkit/frost/frost4.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f8b91ca1a9fa7167b09e773da53f5ae60d0a1fd88f02a783f6e328a72887f6e
+size 36734
diff --git a/degradation_toolkit/frost/frost5.jpg b/degradation_toolkit/frost/frost5.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..350091fd0e9cd3f048e543c0aaf9aeb57f1a4beb
--- /dev/null
+++ b/degradation_toolkit/frost/frost5.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5fc6a19df4a429ba68abdcc8f8a4278d4c9f81c9ccafd2c92ab0c8cf8992ebd2
+size 155150
diff --git a/degradation_toolkit/frost/frost6.jpg b/degradation_toolkit/frost/frost6.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8df587c3f321d7df886ab40ab4c577de8b8aac52
--- /dev/null
+++ b/degradation_toolkit/frost/frost6.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f92b2f48408748085b68dd81d816ef239f42cef3029c25d041fcb6760fb4f25
+size 90185
diff --git a/degradation_toolkit/image_operators.py b/degradation_toolkit/image_operators.py
new file mode 100644
index 0000000000000000000000000000000000000000..878b2c4ca5c3e399c8475f88d5c152486642bb56
--- /dev/null
+++ b/degradation_toolkit/image_operators.py
@@ -0,0 +1,420 @@
+import os
+import cv2
+import numpy as np
+import argparse
+from skimage.filters import gaussian
+from scipy.ndimage.interpolation import map_coordinates
+from tqdm import tqdm
+from PIL import Image
+
+
+def single2uint(img):
+ return np.uint8((img.clip(0, 1)*255.).round())
+
+
+def uint2single(img):
+ return np.float32(img/255.)
+
+
+def Laplacian_edge_detector(img):
+ # input: [0, 1]
+ # return: [0, 1] (H, W, 3)
+ img = np.clip(img*255, 0, 255).astype(np.uint8) # (H, W, 3)
+ img = cv2.GaussianBlur(img, (3, 3), 0)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ img = cv2.Laplacian(img, cv2.CV_16S) # (H, W)
+ img = cv2.convertScaleAbs(img)
+ img = img.astype(np.float32) / 255.
+ img = np.expand_dims(img, 2).repeat(3, axis=2) # (H, W, 3)
+ return img
+
+
+def Laplacian_edge_detector_uint8(img):
+ # input: [0, 255]
+ # return: [0, 255] (H, W, 3)
+ img = cv2.GaussianBlur(img, (3, 3), 0)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ img = cv2.Laplacian(img, cv2.CV_16S) # (H, W)
+ img = cv2.convertScaleAbs(img)
+ img = np.expand_dims(img, 2).repeat(3, axis=2) # (H, W, 3)
+ return img
+
+
+def Canny_edge_detector(img):
+ # input: [0, 1]
+ # return: [0, 1] (H, W, 3)
+ img = np.clip(img*255, 0, 255).astype(np.uint8) # (H, W, 3)
+ img = cv2.GaussianBlur(img, (3, 3), 0)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ img = cv2.Canny(img, 50, 200) # (H, W)
+ img = cv2.convertScaleAbs(img)
+ img = img.astype(np.float32) / 255.
+ img = np.expand_dims(img, 2).repeat(3, axis=2) # (H, W, 3)
+ return img
+
+
+def Canny_edge_detector_uint8(img):
+ # input: [0, 255]
+ # return: [0, 255] (H, W, 3)
+ img = cv2.GaussianBlur(img, (3, 3), 0)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ img = cv2.Canny(img, 50, 200) # (H, W)
+ img = cv2.convertScaleAbs(img)
+ img = np.expand_dims(img, 2).repeat(3, axis=2) # (H, W, 3)
+ return img
+
+
+def Sobel_edge_detector(img):
+ # input: [0, 1]
+ # return: [0, 1] (H, W, 3)
+ img = np.clip(img*255, 0, 255).astype(np.uint8) # (H, W, 3)
+ img = cv2.GaussianBlur(img, (3, 3), 0)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ img = cv2.Sobel(img, cv2.CV_16S, 1, 1) # (H, W)
+ img = cv2.convertScaleAbs(img)
+ img = img.astype(np.float32) / 255.
+ img = np.expand_dims(img, 2).repeat(3, axis=2) # (H, W, 3)
+ return img
+
+
+def erosion(img, kernel_size=5):
+ kernel = np.ones((kernel_size, kernel_size), np.uint8)
+ img = cv2.erode(img, kernel, iterations=1)
+ return img
+
+
+def dilatation(img, kernel_size=5):
+ kernel = np.ones((kernel_size, kernel_size), np.uint8)
+ img = cv2.dilate(img, kernel, iterations=1)
+ return img
+
+
+def opening(img):
+ return dilatation(erosion(img))
+
+
+def closing(img):
+ return erosion(dilatation(img))
+
+
+def morphological_gradient(img):
+ return dilatation(img) - erosion(img)
+
+
+def top_hat(img):
+ return img - opening(img)
+
+
+def black_hat(img):
+ return closing(img) - img
+
+
+def adjust_contrast(image, clip_limit=2.0, tile_grid_size=(8, 8)):
+
+ image = single2uint(image)
+ lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
+
+ l, a, b = cv2.split(lab)
+
+ clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)
+ l_eq = clahe.apply(l)
+
+ lab_eq = cv2.merge((l_eq, a, b))
+ result = cv2.cvtColor(lab_eq, cv2.COLOR_LAB2BGR)
+
+ result = uint2single(result)
+ return result
+
+
+def embossing(img):
+ kernel = np.array([[0, -1, -1],
+ [1, 0, -1],
+ [1, 1, 0]])
+ return cv2.filter2D(img, -1, kernel)
+
+
+def hough_transform_line_detection(img):
+ img = single2uint(img)
+ dst = cv2.Canny(img, 50, 200, apertureSize=3)
+ cdst = cv2.cvtColor(dst, cv2.COLOR_GRAY2BGR)
+ lines = cv2.HoughLinesP(dst, 1, np.pi / 180, 230, None, 0, 0)
+ if lines is not None:
+ for i in range(0, len(lines)):
+ rho = lines[i][0][0]
+ theta = lines[i][0][1]
+ a = np.cos(theta)
+ b = np.sin(theta)
+
+ x0 = a * rho
+ y0 = b * rho
+ pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
+
+ pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
+ cv2.line(img, pt1, pt2, (0, 0, 255), 3, cv2.LINE_AA)
+
+ return uint2single(img)
+
+
+def hough_circle_detection(img):
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 100, param1=100, param2=30, minRadius=50, maxRadius=200)
+ circles = np.uint16(np.around(circles))
+ for i in circles[0, :]:
+ cv2.circle(img, (i[0], i[1]), i[2], (0, 0, 255), 2)
+ return img
+
+
+def disk(radius, alias_blur=0.1, dtype=np.float32):
+ if radius <= 8:
+ L = np.arange(-8, 8 + 1)
+ ksize = (3, 3)
+ else:
+ L = np.arange(-radius, radius + 1)
+ ksize = (5, 5)
+ X, Y = np.meshgrid(L, L)
+ aliased_disk = np.array((X ** 2 + Y ** 2) <= radius ** 2, dtype=dtype)
+ aliased_disk /= np.sum(aliased_disk)
+
+ # supersample disk to antialias
+ return cv2.GaussianBlur(aliased_disk, ksize=ksize, sigmaX=alias_blur)
+
+
+def defocus_blur(image, level=(1, 0.1)):
+ c = level
+ kernel = disk(radius=c[0], alias_blur=c[1])
+
+ channels = []
+ for d in range(3):
+ channels.append(cv2.filter2D(image[:, :, d], -1, kernel))
+ channels = np.array(channels).transpose((1, 2, 0)) # 3x64x64 -> 64x64x3
+
+ return np.clip(channels, 0, 1)
+
+
+def masks_CFA_Bayer(shape):
+ pattern = "RGGB"
+ channels = dict((channel, np.zeros(shape)) for channel in "RGB")
+ for channel, (y, x) in zip(pattern, [(0, 0), (0, 1), (1, 0), (1, 1)]):
+ channels[channel][y::2, x::2] = 1
+ return tuple(channels[c].astype(bool) for c in "RGB")
+
+
+def cfa4_to_rgb(CFA4):
+ RGB = np.zeros((CFA4.shape[0]*2, CFA4.shape[1]*2, 3), dtype=np.uint8)
+ RGB[0::2, 0::2, 0] = CFA4[:, :, 0] # R
+ RGB[0::2, 1::2, 1] = CFA4[:, :, 1] # G on R row
+ RGB[1::2, 0::2, 1] = CFA4[:, :, 2] # G on B row
+ RGB[1::2, 1::2, 2] = CFA4[:, :, 3] # B
+
+ return RGB
+
+
+def mosaic_CFA_Bayer(RGB):
+ RGB = single2uint(RGB)
+ R_m, G_m, B_m = masks_CFA_Bayer(RGB.shape[0:2])
+ mask = np.concatenate(
+ (R_m[..., np.newaxis], G_m[..., np.newaxis], B_m[..., np.newaxis]), axis=-1
+ )
+ mosaic = np.multiply(mask, RGB) # mask*RGB
+ CFA = mosaic.sum(2).astype(np.uint8)
+
+ CFA4 = np.zeros((RGB.shape[0] // 2, RGB.shape[1] // 2, 4), dtype=np.uint8)
+ CFA4[:, :, 0] = CFA[0::2, 0::2]
+ CFA4[:, :, 1] = CFA[0::2, 1::2]
+ CFA4[:, :, 2] = CFA[1::2, 0::2]
+ CFA4[:, :, 3] = CFA[1::2, 1::2]
+
+ rgb = cfa4_to_rgb(CFA4)
+ rgb = uint2single(rgb)
+ return rgb
+
+
+def simulate_barrel_distortion(image, k1=0.02, k2=0.01):
+ height, width = image.shape[:2]
+ mapx, mapy = np.meshgrid(np.arange(width), np.arange(height))
+ mapx = 2 * mapx / (width - 1) - 1
+ mapy = 2 * mapy / (height - 1) - 1
+ r = np.sqrt(mapx**2 + mapy**2)
+ mapx = mapx * (1 + k1 * r**2 + k2 * r**4)
+ mapy = mapy * (1 + k1 * r**2 + k2 * r**4)
+ mapx = (mapx + 1) * (width - 1) / 2
+ mapy = (mapy + 1) * (height - 1) / 2
+ distorted_image = cv2.remap(image, mapx.astype(np.float32), mapy.astype(np.float32), cv2.INTER_LINEAR)
+ return distorted_image
+
+
+def simulate_pincushion_distortion(image, k1=-0.02, k2=-0.01):
+ height, width = image.shape[:2]
+ mapx, mapy = np.meshgrid(np.arange(width), np.arange(height))
+ mapx = 2 * mapx / (width - 1) - 1
+ mapy = 2 * mapy / (height - 1) - 1
+ r = np.sqrt(mapx**2 + mapy**2)
+ mapx = mapx * (1 + k1 * r**2 + k2 * r**4)
+ mapy = mapy * (1 + k1 * r**2 + k2 * r**4)
+ mapx = (mapx + 1) * (width - 1) / 2
+ mapy = (mapy + 1) * (height - 1) / 2
+ distorted_image = cv2.remap(image, mapx.astype(np.float32), mapy.astype(np.float32), cv2.INTER_LINEAR)
+ return distorted_image
+
+
+def rgb2gray(rgb):
+ return np.dot(rgb[..., :3], [0.2989, 0.5870, 0.1140])
+
+
+def spatter(x, severity=1):
+ c = [(0.65, 0.3, 4, 0.69, 0.6, 0),
+ (0.65, 0.3, 3, 0.68, 0.6, 0),
+ (0.65, 0.3, 2, 0.68, 0.5, 0),
+ (0.65, 0.3, 1, 0.65, 1.5, 1),
+ (0.67, 0.4, 1, 0.65, 1.5, 1)][severity - 1]
+ x_PIL = x
+ x = np.array(x, dtype=np.float32) / 255.
+
+ liquid_layer = np.random.normal(size=x.shape[:2], loc=c[0], scale=c[1])
+
+ liquid_layer = gaussian(liquid_layer, sigma=c[2])
+ liquid_layer[liquid_layer < c[3]] = 0
+ if c[5] == 0:
+ liquid_layer = (liquid_layer * 255).astype(np.uint8)
+ dist = 255 - cv2.Canny(liquid_layer, 50, 150)
+ dist = cv2.distanceTransform(dist, cv2.DIST_L2, 5)
+ _, dist = cv2.threshold(dist, 20, 20, cv2.THRESH_TRUNC)
+ dist = cv2.blur(dist, (3, 3)).astype(np.uint8)
+ dist = cv2.equalizeHist(dist)
+ ker = np.array([[-2, -1, 0], [-1, 1, 1], [0, 1, 2]])
+ dist = cv2.filter2D(dist, cv2.CV_8U, ker)
+ dist = cv2.blur(dist, (3, 3)).astype(np.float32)
+
+ m = cv2.cvtColor(liquid_layer * dist, cv2.COLOR_GRAY2BGRA)
+ m /= np.max(m, axis=(0, 1))
+ m *= c[4]
+ # water is pale turqouise
+ color = np.concatenate((175 / 255. * np.ones_like(m[..., :1]),
+ 238 / 255. * np.ones_like(m[..., :1]),
+ 238 / 255. * np.ones_like(m[..., :1])), axis=2)
+
+ color = cv2.cvtColor(color, cv2.COLOR_BGR2BGRA)
+
+ if len(x.shape) < 3 or x.shape[2] < 3:
+ add_spatter_color = cv2.cvtColor(np.clip(m * color, 0, 1),
+ cv2.COLOR_BGRA2BGR)
+ add_spatter_gray = rgb2gray(add_spatter_color)
+
+ return (np.clip(x + add_spatter_gray, 0, 1) * 255).astype(np.uint8)
+
+ else:
+
+ x = cv2.cvtColor(x, cv2.COLOR_BGR2BGRA)
+
+ return (cv2.cvtColor(np.clip(x + m * color, 0, 1),
+ cv2.COLOR_BGRA2BGR) * 255).astype(np.uint8)
+ else:
+ m = np.where(liquid_layer > c[3], 1, 0)
+ m = gaussian(m.astype(np.float32), sigma=c[4])
+ m[m < 0.8] = 0
+
+ x_rgb = np.array(x_PIL)
+
+ # mud brown
+ color = np.concatenate((63 / 255. * np.ones_like(x_rgb[..., :1]),
+ 42 / 255. * np.ones_like(x_rgb[..., :1]),
+ 20 / 255. * np.ones_like(x_rgb[..., :1])),
+ axis=2)
+ color *= m[..., np.newaxis]
+ if len(x.shape) < 3 or x.shape[2] < 3:
+ x *= (1 - m)
+ return (np.clip(x + rgb2gray(color), 0, 1) * 255).astype(np.uint8)
+
+ else:
+ x *= (1 - m[..., np.newaxis])
+ return (np.clip(x + color, 0, 1) * 255).astype(np.uint8)
+
+
+# mod of https://gist.github.com/erniejunior/601cdf56d2b424757de5
+def elastic_transform(image, severity=3):
+ image = np.array(image, dtype=np.float32) / 255.
+ shape = image.shape
+ shape_size = shape[:2]
+
+ sigma = np.array(shape_size) * 0.01
+ alpha = [250 * 0.05, 250 * 0.065, 250 * 0.085, 250 * 0.1, 250 * 0.12][
+ severity - 1]
+ max_dx = shape[0] * 0.005
+ max_dy = shape[0] * 0.005
+
+ dx = (gaussian(np.random.uniform(-max_dx, max_dx, size=shape[:2]),
+ sigma, mode='reflect', truncate=3) * alpha).astype(
+ np.float32)
+ dy = (gaussian(np.random.uniform(-max_dy, max_dy, size=shape[:2]),
+ sigma, mode='reflect', truncate=3) * alpha).astype(
+ np.float32)
+
+ if len(image.shape) < 3 or image.shape[2] < 3:
+ x, y = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))
+ indices = np.reshape(y + dy, (-1, 1)), np.reshape(x + dx, (-1, 1))
+ else:
+ dx, dy = dx[..., np.newaxis], dy[..., np.newaxis]
+ x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]),
+ np.arange(shape[2]))
+ indices = np.reshape(y + dy, (-1, 1)), np.reshape(x + dx,
+ (-1, 1)), np.reshape(
+ z, (-1, 1))
+ return np.clip(
+ map_coordinates(image, indices, order=1, mode='reflect').reshape(
+ shape), 0, 1) * 255
+
+
+def frost(x, severity=2):
+ c = [(1, 0.4),
+ (0.8, 0.6),
+ (0.7, 0.7),
+ (0.65, 0.7),
+ (0.6, 0.75)][severity - 1]
+
+ idx = np.random.randint(5)
+ filename = [os.path.join("degradation_toolkit/frost", 'frost1.png'),
+ os.path.join("degradation_toolkit/frost", 'frost2.png'),
+ os.path.join("degradation_toolkit/frost", 'frost3.png'),
+ os.path.join("degradation_toolkit/frost", 'frost4.jpg'),
+ os.path.join("degradation_toolkit/frost", 'frost5.jpg'),
+ os.path.join("degradation_toolkit/frost", 'frost6.jpg')][idx]
+ frost = Image.open(filename)
+ frost = frost.convert("RGB")
+ frost = np.array(frost)
+ # frost = cv2.imread(filename)
+ frost = uint2single(frost)
+ frost_shape = frost.shape
+ x_shape = np.array(x).shape
+
+ # resize the frost image so it fits to the image dimensions
+ scaling_factor = 1
+ if frost_shape[0] >= x_shape[0] and frost_shape[1] >= x_shape[1]:
+ scaling_factor = 1
+ elif frost_shape[0] < x_shape[0] and frost_shape[1] >= x_shape[1]:
+ scaling_factor = x_shape[0] / frost_shape[0]
+ elif frost_shape[0] >= x_shape[0] and frost_shape[1] < x_shape[1]:
+ scaling_factor = x_shape[1] / frost_shape[1]
+ elif frost_shape[0] < x_shape[0] and frost_shape[1] < x_shape[
+ 1]: # If both dims are too small, pick the bigger scaling factor
+ scaling_factor_0 = x_shape[0] / frost_shape[0]
+ scaling_factor_1 = x_shape[1] / frost_shape[1]
+ scaling_factor = np.maximum(scaling_factor_0, scaling_factor_1)
+
+ scaling_factor *= 1.1
+ new_shape = (int(np.ceil(frost_shape[1] * scaling_factor)),
+ int(np.ceil(frost_shape[0] * scaling_factor)))
+ frost_rescaled = cv2.resize(frost, dsize=new_shape,
+ interpolation=cv2.INTER_CUBIC)
+
+ # randomly crop
+ x_start, y_start = np.random.randint(0, frost_rescaled.shape[0] - x_shape[
+ 0]), np.random.randint(0, frost_rescaled.shape[1] - x_shape[1])
+
+ if len(x_shape) < 3 or x_shape[2] < 3:
+ frost_rescaled = frost_rescaled[x_start:x_start + x_shape[0],
+ y_start:y_start + x_shape[1]]
+ frost_rescaled = rgb2gray(frost_rescaled)
+ else:
+ frost_rescaled = frost_rescaled[x_start:x_start + x_shape[0],
+ y_start:y_start + x_shape[1]][..., [2, 1, 0]]
+ return c[0] * np.array(x) + c[1] * frost_rescaled
diff --git a/degradation_toolkit/x_distortion/__init__.py b/degradation_toolkit/x_distortion/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd2be4b46ebd80a1208d641265a981b61fd3d9a4
--- /dev/null
+++ b/degradation_toolkit/x_distortion/__init__.py
@@ -0,0 +1,120 @@
+from .blur import *
+from .brightness import *
+from .quantization import *
+from .compression import *
+from .contrast import *
+from .noise import *
+from .oversharpen import *
+from .pixelate import *
+from .saturate import *
+
+
+def add_distortion(img, severity=1, distortion_name=None):
+ """This function returns a distorted version of the given image.
+
+ @param img (np.ndarray, unit8): Input image, H x W x 3, RGB, [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @distortion_name:
+ @return: Degraded image (np.ndarray, unit8), H x W x 3, RGB, [0, 255]
+ """
+
+ if not isinstance(img, np.ndarray):
+ raise AttributeError('Expecting type(img) to be numpy.ndarray')
+ if not (img.dtype.type is np.uint8):
+ raise AttributeError('Expecting img.dtype.type to be numpy.uint8')
+
+ if not (img.ndim in [2, 3]):
+ raise AttributeError('Expecting img.shape to be either (h x w) or (h x w x c)')
+ if img.ndim == 2:
+ img = np.stack((img,) * 3, axis=-1)
+
+ h, w, c = img.shape
+ if (h < 32 or w < 32):
+ raise AttributeError('The (w, h) must be at least 32 pixels')
+ if not (c in [1, 3]):
+ raise AttributeError('Expecting img to have either 1 or 3 chennels')
+ if c == 1:
+ img = np.stack((np.squeeze(img),) * 3, axis=-1)
+
+ if severity not in [1, 2, 3, 4, 5]:
+ raise AttributeError('The severity must be an integer in [1, 5]')
+
+ if distortion_name:
+ img_lq = globals()[distortion_name](img, severity)
+ else:
+ raise ValueError("The distortion_name must be passed")
+
+ return np.uint8(img_lq)
+
+
+distortions_dict = {
+ "blur": [
+ "blur_gaussian",
+ "blur_motion",
+ "blur_glass",
+ "blur_lens",
+ "blur_zoom",
+ "blur_jitter",
+ ],
+ "noise": [
+ "noise_gaussian_RGB",
+ "noise_gaussian_YCrCb",
+ "noise_speckle",
+ "noise_spatially_correlated",
+ "noise_poisson",
+ "noise_impulse",
+ ],
+ "compression": [
+ "compression_jpeg",
+ "compression_jpeg_2000",
+ ],
+ "brighten": [
+ "brightness_brighten_shfit_HSV",
+ "brightness_brighten_shfit_RGB",
+ "brightness_brighten_gamma_HSV",
+ "brightness_brighten_gamma_RGB",
+ ],
+ "darken": [
+ "brightness_darken_shfit_HSV",
+ "brightness_darken_shfit_RGB",
+ "brightness_darken_gamma_HSV",
+ "brightness_darken_gamma_RGB",
+ ],
+ "contrast_strengthen": [
+ "contrast_strengthen_scale",
+ "contrast_strengthen_stretch",
+ ],
+ "contrast_weaken": [
+ "contrast_weaken_scale",
+ "contrast_weaken_stretch",
+ ],
+ "saturate_strengthen": [
+ "saturate_strengthen_HSV",
+ "saturate_strengthen_YCrCb",
+ ],
+ "saturate_weaken": [
+ "saturate_weaken_HSV",
+ "saturate_weaken_YCrCb",
+ ],
+ "oversharpen": [
+ "oversharpen",
+ ],
+ "pixelate": [
+ "pixelate",
+ ],
+ "quantization": [
+ "quantization_otsu",
+ "quantization_median",
+ "quantization_hist",
+ ],
+ "spatter": [
+ "spatter",
+ ],
+}
+
+
+def get_distortion_names(subset=None):
+ if subset in distortions_dict:
+ print(distortions_dict[subset])
+ else:
+ print(distortions_dict)
diff --git a/degradation_toolkit/x_distortion/blur.py b/degradation_toolkit/x_distortion/blur.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c42c0c743aac83abdf3267a75876c88ca4c5aee
--- /dev/null
+++ b/degradation_toolkit/x_distortion/blur.py
@@ -0,0 +1,155 @@
+import cv2
+import numpy as np
+
+from skimage.filters import gaussian
+from .helper import (
+ _motion_blur,
+ shuffle_pixels_njit,
+ clipped_zoom,
+ gen_disk,
+ gen_lensmask,
+)
+
+
+def blur_gaussian(img, severity=1):
+ """
+ Gaussian Blur.
+ severity=[1, 2, 3, 4, 5] corresponding to sigma=[1, 2, 3, 4, 5].
+ severity mainly refer to KADID-10K and Imagecorruptions.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [1, 2, 3, 4, 5][severity - 1]
+ img = np.array(img) / 255.
+ img = gaussian(img, sigma=c, channel_axis=-1)
+ img = np.clip(img, 0, 1) * 255
+ return img.round().astype(np.uint8)
+
+
+def blur_gaussian_lensmask(img, severity=1):
+ """
+ Gaussian Blur with Lens Mask.
+ severity=[1, 2, 3, 4, 5] corresponding to
+ [gamma, sigma]=[[2.0, 2], [2.4, 4], [3.0, 6], [3.8, 8], [5.0, 10]].
+ severity mainly refer to PieAPP.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [(2.0, 2), (2.4, 4), (3.0, 6), (3.8, 8), (5.0, 10)][severity - 1]
+ img_orig = np.array(img) / 255.
+ h, w = img.shape[:2]
+ mask = gen_lensmask(h, w, gamma=c[0])[:, :, None]
+ img = gaussian(img_orig, sigma=c[1], channel_axis=-1)
+ img = mask * img_orig + (1 - mask) * img
+ img = np.clip(img, 0, 1) * 255
+ return img.round().astype(np.uint8)
+
+
+def blur_motion(img, severity=1):
+ """
+ Motion Blur.
+ severity = [1, 2, 3, 4, 5] corresponding to radius=[5, 10, 15, 15, 20] and
+ sigma=[1, 2, 3, 4, 5].
+ severity mainly refer to Imagecorruptions.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [0, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [(5, 3), (10, 5), (15, 7), (15, 9), (20, 12)][severity - 1]
+ angle = np.random.uniform(-90, 90)
+ img = np.array(img)
+ img = _motion_blur(img, radius=c[0], sigma=c[1], angle=angle)
+ img = np.clip(img, 0, 255)
+ return img.round().astype(np.uint8)
+
+
+def blur_glass(img, severity=1):
+ """
+ Glass Blur.
+ severity = [1, 2, 3, 4, 5] corresponding to
+ [sigma, shift, iteration]=[(0.7, 1, 1), (0.9, 2, 1), (1.2, 2, 2), (1.4, 3, 2), (1.6, 4, 2)].
+ severity mainly refer to Imagecorruptions.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [0, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [(0.7, 1, 1), (0.9, 2, 1), (1.2, 2, 2), (1.4, 3, 2), (1.6, 4, 2)][severity - 1]
+ img = np.array(img) / 255.
+ img = gaussian(img, sigma=c[0], channel_axis=-1)
+ img = shuffle_pixels_njit(img, shift=c[1], iteration=c[2])
+ img = np.clip(gaussian(img, sigma=c[0], channel_axis=-1), 0, 1) * 255
+ return img.round().astype(np.uint8)
+
+
+def blur_lens(img, severity=1):
+ """
+ Lens Blur.
+ severity = [1, 2, 3, 4, 5] corresponding to radius=[2, 3, 4, 6, 8].
+ severity mainly refer to KADID-10K.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [0, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [2, 3, 4, 6, 8][severity - 1]
+ img = np.array(img) / 255.
+ kernel = gen_disk(radius=c)
+ img_lq = []
+ for i in range(3):
+ img_lq.append(cv2.filter2D(img[:, :, i], -1, kernel))
+ img_lq = np.array(img_lq).transpose((1, 2, 0))
+ img_lq = np.clip(img_lq, 0, 1) * 255
+ return img_lq.round().astype(np.uint8)
+
+
+def blur_zoom(img, severity=1):
+ """
+ Zoom Blur.
+ severity = [1, 2, 3, 4, 5] corresponding to radius=
+ [np.arange(1, 1.03, 0.02),
+ np.arange(1, 1.06, 0.02),
+ np.arange(1, 1.10, 0.02),
+ np.arange(1, 1.15, 0.02),
+ np.arange(1, 1.21, 0.02)].
+ severity mainly refer to Imagecorruptions.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [0, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [np.arange(1, 1.03, 0.02),
+ np.arange(1, 1.06, 0.02),
+ np.arange(1, 1.10, 0.02),
+ np.arange(1, 1.15, 0.02),
+ np.arange(1, 1.21, 0.02)][severity - 1]
+ img = (np.array(img) / 255.).astype(np.float32)
+ h, w = img.shape[:2]
+ img_lq = np.zeros_like(img)
+ for zoom_factor in c:
+ zoom_layer = clipped_zoom(img, zoom_factor)
+ img_lq += zoom_layer[:h, :w, :]
+ img_lq = (img + img_lq) / (len(c) + 1)
+ img_lq = np.clip(img_lq, 0, 1) * 255
+ return img_lq.round().astype(np.uint8)
+
+
+def blur_jitter(img, severity=1):
+ """
+ Jitter Blur.
+ severity = [1, 2, 3, 4, 5] corresponding to shift=[1, 2, 3, 4, 5].
+ severity mainly refer to KADID-10K.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [0, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [1, 2, 3, 4, 5][severity - 1]
+ img = np.array(img)
+ img_lq = shuffle_pixels_njit(img, shift=c, iteration=1)
+ return np.uint8(img_lq)
diff --git a/degradation_toolkit/x_distortion/brightness.py b/degradation_toolkit/x_distortion/brightness.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea64e820ff5a7db695d3b0178913e40225898d2b
--- /dev/null
+++ b/degradation_toolkit/x_distortion/brightness.py
@@ -0,0 +1,150 @@
+import numpy as np
+import cv2
+from .helper import gen_lensmask
+
+
+def brightness_brighten_shfit_HSV(img, severity=1):
+ """
+ The RGB image is mapping to HSV, and then enhance the brightness by V channel
+ severity=[1,2,3,4,5] is corresponding to c=[0.1, 0.2, 0.3, 0.4, 0.5]
+
+ @param img: Input image, H x W x RGB, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x RGB, value range [0, 255]
+ """
+ c = [0.1, 0.2, 0.3, 0.4, 0.5][severity-1]
+ img = np.float32(np.array(img) / 255.)
+ img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+ img_hsv[:, :, 2] += c
+ img_lq = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def brightness_brighten_shfit_RGB(img, severity=1):
+ """
+ The RGB image is directly enhanced by RGB mean shift
+ severity=[1,2,3,4,5] is corresponding to c=[0.1, 0.15, 0.2, 0.27, 0.35]
+
+ @param img: Input image, H x W x RGB, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x RGB, value range [0, 255]
+ """
+ c = [0.1, 0.15, 0.2, 0.27, 0.35][severity-1]
+ img = np.float32(np.array(img) / 255.)
+ img_lq = img + c
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def brightness_brighten_gamma_RGB(img, severity=1):
+ """
+ The RGB image is enhanced by V channel with a gamma function
+ severity=[1,2,3,4,5] is corresponding to gamma=[0.8, 0.7, 0.6, 0.45, 0.3]
+
+ @param img: Input image, H x W x RGB, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x RGB, value range [0, 255]
+ """
+ gamma = [0.8, 0.7, 0.6, 0.45, 0.3][severity-1]
+ img = np.array(img / 255.)
+ img_lq = img ** gamma
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def brightness_brighten_gamma_HSV(img, severity=1):
+ """
+ The RGB image is enhanced by V channel with a gamma function
+ severity=[1,2,3,4,5] is corresponding to gamma=[0.7, 0.55, 0.4, 0.25, 0.1]
+
+ @param img: Input image, H x W x RGB, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x RGB, value range [0, 255]
+ """
+ gamma = [0.7, 0.58, 0.47, 0.36, 0.25][severity-1]
+ img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+ img_hsv = np.array(img_hsv / 255.)
+ img_hsv[:, :, 2] = img_hsv[:, :, 2] ** gamma
+ img_lq = np.uint8(np.clip(img_hsv, 0, 1) * 255.)
+ img_lq = cv2.cvtColor(img_lq, cv2.COLOR_HSV2RGB)
+ return img_lq
+
+
+def brightness_darken_shfit_HSV(img, severity=1):
+ """
+ The RGB image is mapping to HSV, and then darken the brightness by V channel
+ severity=[1,2,3,4,5] is corresponding to c=[0.1, 0.2, 0.3, 0.4, 0.5]
+
+ @param img: Input image, H x W x RGB, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x RGB, value range [0, 255]
+ """
+ c = [0.1, 0.2, 0.3, 0.4, 0.5][severity-1]
+ img = np.float32(np.array(img) / 255.)
+ img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+ img_hsv[:, :, 2] -= c
+ img_lq = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def brightness_darken_shfit_RGB(img, severity=1):
+ """
+ The RGB image's brightness is directly reduced by RGB mean shift
+ severity=[1,2,3,4,5] is corresponding to c=[0.1, 0.15, 0.2, 0.27, 0.35]
+
+ @param img: Input image, H x W x RGB, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x RGB, value range [0, 255]
+ """
+ c = [0.1, 0.15, 0.2, 0.27, 0.35][severity-1]
+ img = np.float32(np.array(img)/255.)
+ img_lq = img - c
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def brightness_darken_gamma_RGB(img, severity=1):
+ """
+ The RGB image is darkened by V channel with a gamma function
+ severity=[1,2,3,4,5] is corresponding to gamma=[1.4, 1.7, 2.1, 2.6, 3.2]
+
+ @param img: Input image, H x W x RGB, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x RGB, value range [0, 255]
+ """
+ gamma = [1.4, 1.7, 2.1, 2.6, 3.2][severity-1]
+ img = np.array(img / 255.)
+ img_lq = img ** gamma
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def brightness_darken_gamma_HSV(img, severity=1):
+ """
+ The RGB image is enhanced by V channel with a gamma function
+ severity=[1,2,3,4,5] is corresponding to gamma=[1.5, 1.8, 2.2, 2.7, 3.5]
+
+ @param img: Input image, H x W x RGB, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x RGB, value range [0, 255]
+ """
+ gamma = [1.5, 1.8, 2.2, 2.7, 3.5][severity-1]
+ img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+ img_hsv = np.array(img_hsv / 255.)
+ img_hsv[:, :, 2] = img_hsv[:, :, 2] ** gamma
+ img_lq = np.uint8(np.clip(img_hsv, 0, 1) * 255.)
+ img_lq = cv2.cvtColor(img_lq, cv2.COLOR_HSV2RGB)
+ return img_lq
+
+
+def brightness_vignette(img, severity=1):
+ """
+ The RGB image is suffered from the vignette effect.
+ severity=[1,2,3,4,5] is corresponding to gamma=[0.5, 0.875, 1.25, 1.625, 2]
+
+ @param img: Input image, H x W x RGB, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x RGB, value range [0, 255]
+ """
+ gamma = [0.5, 0.875, 1.25, 1.625, 2][severity - 1]
+ img = np.array(img)
+ h, w = img.shape[:2]
+ mask = gen_lensmask(h, w, gamma=gamma)[:, :, None]
+ img_lq = mask * img
+ return np.uint8(np.clip(img_lq, 0, 255))
diff --git a/degradation_toolkit/x_distortion/compression.py b/degradation_toolkit/x_distortion/compression.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6934ee3d7d3a0b1a58554f31d53ee1fb499d4cb
--- /dev/null
+++ b/degradation_toolkit/x_distortion/compression.py
@@ -0,0 +1,78 @@
+import numpy as np
+from PIL import Image
+from io import BytesIO
+
+
+def compression_jpeg(img, severity=1):
+ """
+ JPEG compression on a NumPy array.
+ severity=[1,2,3,4,5] corresponding to quality=[25,18,15,10,7].
+ from https://github.com/bethgelab/imagecorruptions/blob/master/imagecorruptions/corruptions.py
+
+ @param img: Input image as NumPy array, H x W x C, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image as NumPy array, H x W x C, value range [0, 255]
+ """
+ assert img.dtype == np.uint8, "Image array should have dtype of np.uint8"
+ assert severity in [1, 2, 3, 4, 5], 'Severity must be an integer between 1 and 5.'
+
+ quality = [25, 18, 12, 8, 5][severity - 1]
+ output = BytesIO()
+ gray_scale = False
+ if img.shape[2] == 1: # Check if the image is grayscale
+ gray_scale = True
+ # Convert NumPy array to PIL Image
+ img = Image.fromarray(img)
+ if gray_scale:
+ img = img.convert('L')
+ else:
+ img = img.convert('RGB')
+ # Save image to a bytes buffer using JPEG compression
+ img.save(output, 'JPEG', quality=quality)
+ output.seek(0)
+ # Load the compressed image from the bytes buffer
+ img_lq = Image.open(output)
+ # Convert PIL Image back to NumPy array
+ if gray_scale:
+ img_lq = np.array(img_lq.convert('L'))
+ img_lq = img_lq.reshape((img_lq.shape[0], img_lq.shape[1], 1)) # Maintaining the original shape (H, W, 1)
+ else:
+ img_lq = np.array(img_lq.convert('RGB'))
+ return img_lq
+
+
+def compression_jpeg_2000(img, severity=1):
+ """
+ JPEG2000 compression on a NumPy array.
+ severity=[1,2,3,4,5] corresponding to quality=[29,27.5,26,24.5,23], quality_mode='dB'.
+
+ @param x: Input image as NumPy array, H x W x C, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image as NumPy array, H x W x C, value range [0, 255]
+ """
+ assert img.dtype == np.uint8, "Image array should have dtype of np.uint8"
+ assert severity in [1, 2, 3, 4, 5], 'Severity must be an integer between 1 and 5.'
+
+ quality = [29, 27.5, 26, 24.5, 23][severity - 1]
+ output = BytesIO()
+ gray_scale = False
+ if img.shape[2] == 1: # Check if the image is grayscale
+ gray_scale = True
+ # Convert NumPy array to PIL Image
+ img = Image.fromarray(img)
+ if gray_scale:
+ img = img.convert('L')
+ else:
+ img = img.convert('RGB')
+ # Save image to a bytes buffer using JPEG compression
+ img.save(output, 'JPEG2000', quality_mode='dB', quality_layers=[quality])
+ output.seek(0)
+ # Load the compressed image from the bytes buffer
+ img_lq = Image.open(output)
+ # Convert PIL Image back to NumPy array
+ if gray_scale:
+ img_lq = np.array(img_lq.convert('L'))
+ img_lq = img_lq.reshape((img_lq.shape[0], img_lq.shape[1], 1)) # Maintaining the original shape (H, W, 1)
+ else:
+ img_lq = np.array(img_lq.convert('RGB'))
+ return img_lq
diff --git a/degradation_toolkit/x_distortion/contrast.py b/degradation_toolkit/x_distortion/contrast.py
new file mode 100644
index 0000000000000000000000000000000000000000..707c1e65ce44ea5291a7525b1c16f196d100d6c3
--- /dev/null
+++ b/degradation_toolkit/x_distortion/contrast.py
@@ -0,0 +1,74 @@
+import cv2
+import numpy as np
+from PIL import Image
+from PIL import ImageEnhance
+
+
+def contrast_weaken_scale(img, severity=1):
+ """
+ Contrast Weaken by scaling.
+ severity=[1, 2, 3, 4, 5] corresponding to scale=[0.75, 0.6, 0.45, 0.3, 0.2].
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [0.75, 0.6, 0.45, 0.3, 0.2][severity - 1]
+ img = Image.fromarray(img)
+ enhancer = ImageEnhance.Contrast(img)
+ img = enhancer.enhance(c)
+ img = np.uint8(np.clip(np.array(img), 0, 255))
+ return img
+
+
+def contrast_weaken_stretch(img, severity=1):
+ """
+ Contrast Weaken by stretching.
+ severity=[1, 2, 3, 4, 5] corresponding to scale=[1.0, 0.9, 0.8, 0.6, 0.4].
+ severity mainly refer to PieAPP.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [1.0, 0.9, 0.8, 0.6, 0.4][severity - 1]
+ img = np.array(img) / 255.
+ img_mean = np.mean(img, axis=(0,1), keepdims=True)
+ img = 1. / (1 + (img_mean / (img + 1e-12)) ** c)
+ img = np.uint8(np.clip(img, 0, 1) * 255)
+ return img
+
+
+def contrast_strengthen_scale(img, severity=1):
+ """
+ Contrast Strengthen by scaling.
+ severity=[1, 2, 3, 4, 5] corresponding to scale=[1.4, 1.7, 2.1, 2.6, 4.0].
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [1.4, 1.7, 2.1, 2.6, 4.0][severity - 1]
+ img = Image.fromarray(img)
+ enhancer = ImageEnhance.Contrast(img)
+ img = enhancer.enhance(c)
+ img = np.uint8(np.clip(np.array(img), 0, 255))
+ return img
+
+
+def contrast_strengthen_stretch(img, severity=1):
+ """
+ Contrast Strengthen by stretching.
+ severity=[1, 2, 3, 4, 5] corresponding to scale=[2.0, 4.0, 6.0, 8.0, 10.0].
+ severity mainly refer to PieAPP.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [2.0, 4.0, 6.0, 8.0, 10.0][severity - 1]
+ img = np.array(img) / 255.
+ img_mean = np.mean(img, axis=(0,1), keepdims=True)
+ img = 1. / (1 + (img_mean / (img + 1e-12)) ** c)
+ img = np.uint8(np.clip(img, 0, 1) * 255)
+ return img
diff --git a/degradation_toolkit/x_distortion/helper.py b/degradation_toolkit/x_distortion/helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..176674063bbc519e075a71828b7602164a3df68d
--- /dev/null
+++ b/degradation_toolkit/x_distortion/helper.py
@@ -0,0 +1,171 @@
+import cv2
+from scipy.ndimage import zoom as scizoom
+from numba import njit, prange
+import numpy as np
+import math
+
+
+def gen_lensmask(h, w, gamma):
+ """
+ Generate lens mask with shape (h, w).
+ For point (i, j),
+ distance = [(i - h // 2)^2 + (j - w // 2)^2] ^ (1/2) / [h // 2)^2 + (w // 2)^2] ^ (1/2)
+ mask = scale * (1 - distance) ^ gamma
+
+ @param h: height
+ @param w: width
+ @param gamma: exponential factor
+ @return: Mask, H x W
+ """
+ dist1 = np.array([list(range(w))] * h) - w // 2
+ dist2 = np.array([list(range(h))] * w) - h // 2
+ dist2 = np.transpose(dist2, (1, 0))
+ dist = np.sqrt((dist1 ** 2 + dist2 ** 2)) / np.sqrt((w ** 2 + h ** 2) / 4)
+ mask = (1 - dist) ** gamma
+ return mask
+
+
+def gen_disk(radius, dtype=np.float32):
+ if radius <= 8:
+ L = np.arange(-8, 8 + 1)
+ else:
+ L = np.arange(-radius, radius + 1)
+ X, Y = np.meshgrid(L, L)
+ disk = np.array((X ** 2 + Y ** 2) <= radius ** 2, dtype=dtype)
+ disk /= np.sum(disk)
+ return disk
+
+
+# modification of https://github.com/FLHerne/mapgen/blob/master/diamondsquare.py
+def plasma_fractal(mapsize=256, wibbledecay=3):
+ """
+ Generate a heightmap using diamond-square algorithm.
+ Return square 2d array, side length 'mapsize', of floats in range 0-255.
+ 'mapsize' must be a power of two.
+ """
+ assert (mapsize & (mapsize - 1) == 0)
+ maparray = np.empty((mapsize, mapsize), dtype=np.float_)
+ maparray[0, 0] = 0
+ stepsize = mapsize
+ wibble = 100
+
+ def wibbledmean(array):
+ return array / 4 + wibble * np.random.uniform(-wibble, wibble,
+ array.shape)
+
+ def fillsquares():
+ """For each square of points stepsize apart,
+ calculate middle value as mean of points + wibble"""
+ cornerref = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
+ squareaccum = cornerref + np.roll(cornerref, shift=-1, axis=0)
+ squareaccum += np.roll(squareaccum, shift=-1, axis=1)
+ maparray[stepsize // 2:mapsize:stepsize,
+ stepsize // 2:mapsize:stepsize] = wibbledmean(squareaccum)
+
+ def filldiamonds():
+ """For each diamond of points stepsize apart,
+ calculate middle value as mean of points + wibble"""
+ mapsize = maparray.shape[0]
+ drgrid = maparray[stepsize // 2:mapsize:stepsize,
+ stepsize // 2:mapsize:stepsize]
+ ulgrid = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
+ ldrsum = drgrid + np.roll(drgrid, 1, axis=0)
+ lulsum = ulgrid + np.roll(ulgrid, -1, axis=1)
+ ltsum = ldrsum + lulsum
+ maparray[0:mapsize:stepsize,
+ stepsize // 2:mapsize:stepsize] = wibbledmean(ltsum)
+ tdrsum = drgrid + np.roll(drgrid, 1, axis=1)
+ tulsum = ulgrid + np.roll(ulgrid, -1, axis=0)
+ ttsum = tdrsum + tulsum
+ maparray[stepsize // 2:mapsize:stepsize,
+ 0:mapsize:stepsize] = wibbledmean(ttsum)
+
+ while stepsize >= 2:
+ fillsquares()
+ filldiamonds()
+ stepsize //= 2
+ wibble /= wibbledecay
+
+ maparray -= maparray.min()
+ return maparray / maparray.max()
+
+
+def clipped_zoom(img, zoom_factor):
+ # clipping along the width dimension:
+ ch0 = int(np.ceil(img.shape[0] / float(zoom_factor)))
+ top0 = (img.shape[0] - ch0) // 2
+
+ # clipping along the height dimension:
+ ch1 = int(np.ceil(img.shape[1] / float(zoom_factor)))
+ top1 = (img.shape[1] - ch1) // 2
+
+ img = scizoom(img[top0:top0 + ch0, top1:top1 + ch1],
+ (zoom_factor, zoom_factor, 1), order=1)
+
+ return img
+
+
+def getOptimalKernelWidth1D(radius, sigma):
+ return radius * 2 + 1
+
+
+def gauss_function(x, mean, sigma):
+ return (np.exp(- (x - mean)**2 / (2 * (sigma**2)))) / (np.sqrt(2 * np.pi) * sigma)
+
+
+def getMotionBlurKernel(width, sigma):
+ k = gauss_function(np.arange(width), 0, sigma)
+ Z = np.sum(k)
+ return k/Z
+
+
+def shift(image, dx, dy):
+ if(dx < 0):
+ shifted = np.roll(image, shift=image.shape[1]+dx, axis=1)
+ shifted[:,dx:] = shifted[:,dx-1:dx]
+ elif(dx > 0):
+ shifted = np.roll(image, shift=dx, axis=1)
+ shifted[:,:dx] = shifted[:,dx:dx+1]
+ else:
+ shifted = image
+
+ if(dy < 0):
+ shifted = np.roll(shifted, shift=image.shape[0]+dy, axis=0)
+ shifted[dy:,:] = shifted[dy-1:dy,:]
+ elif(dy > 0):
+ shifted = np.roll(shifted, shift=dy, axis=0)
+ shifted[:dy,:] = shifted[dy:dy+1,:]
+ return shifted
+
+
+def _motion_blur(x, radius, sigma, angle):
+ width = getOptimalKernelWidth1D(radius, sigma)
+ kernel = getMotionBlurKernel(width, sigma)
+ point = (width * np.sin(np.deg2rad(angle)), width * np.cos(np.deg2rad(angle)))
+ hypot = math.hypot(point[0], point[1])
+
+ blurred = np.zeros_like(x, dtype=np.float32)
+ for i in range(width):
+ dy = -math.ceil(((i*point[0]) / hypot) - 0.5)
+ dx = -math.ceil(((i*point[1]) / hypot) - 0.5)
+ if (np.abs(dy) >= x.shape[0] or np.abs(dx) >= x.shape[1]):
+ # simulated motion exceeded image borders
+ break
+ shifted = shift(x, dx, dy)
+ blurred = blurred + kernel[i] * shifted
+ return blurred
+
+
+# Numba nopython compilation to shuffle_pixles
+@njit()
+def shuffle_pixels_njit(img, shift, iteration):
+ height, width = img.shape[:2]
+ # locally shuffle pixels
+ for _ in range(iteration):
+ for h in range(height - shift, shift, -1):
+ for w in range(width - shift, shift, -1):
+ dx, dy = np.random.randint(-shift, shift, size=(2,))
+ h_prime, w_prime = h + dy, w + dx
+ # swap
+ img[h, w], img[h_prime, w_prime] = img[h_prime, w_prime], img[h, w]
+ return img
diff --git a/degradation_toolkit/x_distortion/noise.py b/degradation_toolkit/x_distortion/noise.py
new file mode 100644
index 0000000000000000000000000000000000000000..8661fc6b38c6899d26a0a17574461b6c0136df4e
--- /dev/null
+++ b/degradation_toolkit/x_distortion/noise.py
@@ -0,0 +1,117 @@
+import cv2
+import numpy as np
+import skimage as sk
+
+
+def noise_gaussian_RGB(img, severity=1):
+ """
+ Additive Gaussian noise in RGB channels.
+ severity=[1, 2, 3, 4, 5] is corresponding to sigma=[0.05, 0.1, 0.15, 0.2, 0.25].
+ severity mainly refer to KADID-10K and Imagecorruptions.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ sigma = [0.05, 0.1, 0.15, 0.2, 0.25][severity-1]
+ img = np.array(img) / 255.
+ noise = np.random.normal(0, sigma, img.shape)
+ img_lq = img + noise
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def noise_gaussian_YCrCb(img, severity=1):
+ """
+ Additive Gaussian noise with higher noise in color channels.
+ severity=[1, 2, 3, 4, 5] is corresponding to
+ sigma_l=[0.05, 0.06, 0.07, 0.08, 0.09],
+ sigma_r=[1, 1.45, 1.9, 2.35, 2.8],
+ sigma_b=[1, 1.45, 1.9, 2.35, 2.8].
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ sigma_l = [0.05, 0.06, 0.07, 0.08, 0.09][severity-1]
+ sigma_r = sigma_l * [1, 1.45, 1.9, 2.35, 2.8][severity - 1]
+ sigma_b = sigma_l * [1, 1.45, 1.9, 2.35, 2.8][severity - 1]
+ h, w = img.shape[:2]
+ img = np.float32(np.array(img) / 255.)
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2YCR_CB)
+ noise_l = np.expand_dims(np.random.normal(0, sigma_l, (h, w)), 2)
+ noise_r = np.expand_dims(np.random.normal(0, sigma_r, (h, w)), 2)
+ noise_b = np.expand_dims(np.random.normal(0, sigma_b, (h, w)), 2)
+ noise = np.concatenate((noise_l, noise_r, noise_b), axis=2)
+ img_lq = np.float32(img + noise)
+ img_lq = cv2.cvtColor(img_lq, cv2.COLOR_YCR_CB2RGB)
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def noise_speckle(img, severity=1):
+ """
+ Multiplicative Gaussian noise.
+ severity=[1, 2, 3, 4, 5] is corresponding to sigma=[0.14, 0.21, 0.28, 0.35, 0.42].
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [0.14, 0.21, 0.28, 0.35, 0.42][severity - 1]
+ img = np.array(img) / 255.
+ noise = img * np.random.normal(size=img.shape, scale=c)
+ img_lq = img + noise
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def noise_spatially_correlated(img, severity=1):
+ """
+ Spatially correlated noise.
+ severity=[1, 2, 3, 4, 5] is corresponding to sigma=[0.08, 0.11, 0.14, 0.18, 0.22].
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ sigma = [0.08, 0.11, 0.14, 0.18, 0.22][severity - 1]
+ img = np.array(img) / 255.
+ noise = np.random.normal(0, sigma, img.shape)
+ img_lq = img + noise
+ img_lq = cv2.blur(img_lq, [3, 3])
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def noise_poisson(img, severity=1):
+ """
+ Poisson noise.
+ PieAPP keeps this distortion free of additional parameters.
+ The default:
+ c = vals = len(np.unique(image))
+ vals = 2 ** np.ceil(np.log2(vals))
+ But Imagecorruptions introduces a extra parameter c
+ ranging [60, 25, 12, 5, 3] for sigma = sqrt(I / c).
+ severity=[1, 2, 3, 4, 5] is corresponding to c=[80, 60, 40, 25, 15].
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [80, 60, 40, 25, 15][severity - 1]
+ img = np.array(img) / 255.
+ img_lq = np.random.poisson(img * c) / float(c)
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
+
+
+def noise_impulse(img, severity=1):
+ """
+ Impulse noise is also known as salt&pepper noise.
+ PieAPP introduce the range [1e-4, 0.045].
+ severity=[1, 2, 3, 4, 5] is corresponding to amount=[0.01, 0.03, 0.05, 0.07, 0.10].
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [0.01, 0.03, 0.05, 0.07, 0.10][severity - 1]
+ img = np.array(img) / 255.
+ img_lq = sk.util.random_noise(img, mode='s&p', amount=c)
+ return np.uint8(np.clip(img_lq, 0, 1) * 255.)
diff --git a/degradation_toolkit/x_distortion/oversharpen.py b/degradation_toolkit/x_distortion/oversharpen.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8326a7e42292a1551493ae28fc2bb2ac9810857
--- /dev/null
+++ b/degradation_toolkit/x_distortion/oversharpen.py
@@ -0,0 +1,31 @@
+import cv2
+import numpy as np
+
+
+def oversharpen(img, severity=1):
+ """
+ OverSharpening filter on a NumPy array.
+ severity = [1, 5] corresponding to amount = [2, 4, 6, 8, 10]
+
+ @param x: Input image as NumPy array, H x W x C, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image as NumPy array, H x W x C, value range [0, 255]
+ """
+ assert img.dtype == np.uint8, "Image array should have dtype of np.uint8"
+ assert severity in [1, 2, 3, 4, 5], 'Severity must be an integer between 1 and 5.'
+
+ amount = [2, 2.8, 4, 6, 8][severity - 1]
+
+ # Setting the kernel size and sigmaX value for Gaussian blur
+ # In OpenCV's Size(kernel_width, kernel_height), both kernel_width and kernel_height
+ # should be odd numbers; for example, we can use (2*radius+1, 2*radius+1)
+ blur_radius = 2 # The radius is the blur radius used to set the size of the Gaussian kernel
+ sigmaX = 0
+
+ # Create a blurred/smoothed version of the image
+ blurred = cv2.GaussianBlur(img, (2*blur_radius+1, 2*blur_radius+1), sigmaX)
+
+ # Compute the sharpened image with an enhancement factor of 'amount'
+ sharpened = cv2.addWeighted(img, 1 + amount, blurred, -amount, 0)
+
+ return sharpened
diff --git a/degradation_toolkit/x_distortion/pixelate.py b/degradation_toolkit/x_distortion/pixelate.py
new file mode 100644
index 0000000000000000000000000000000000000000..b495e3aa479cf421483601f61d8d00f8ada6e737
--- /dev/null
+++ b/degradation_toolkit/x_distortion/pixelate.py
@@ -0,0 +1,21 @@
+import numpy as np
+
+from PIL import Image
+
+
+def pixelate(img, severity=1):
+ """
+ Pixelate.
+ severity=[1, 2, 3, 4, 5] corresponding to sigma=[0.5, 0.4, 0.3, 0.25, 0.2].
+ severity mainly refer to Imagecorruptions.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [0.5, 0.4, 0.3, 0.25, 0.2][severity - 1]
+ h, w = np.array(img).shape[:2]
+ img = Image.fromarray(img)
+ img = img.resize((int(w * c), int(h * c)), Image.BOX)
+ img = img.resize((w, h), Image.NEAREST)
+ return np.array(img).astype(np.uint8)
diff --git a/degradation_toolkit/x_distortion/quantization.py b/degradation_toolkit/x_distortion/quantization.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3b59ed2092de03473e5f008fe758f82aa1faa0b
--- /dev/null
+++ b/degradation_toolkit/x_distortion/quantization.py
@@ -0,0 +1,68 @@
+import numpy as np
+
+from PIL import Image
+from skimage.filters import threshold_multiotsu
+
+
+
+def quantization_otsu(img, severity=1):
+ """
+ Color Quantization using OTSU method.
+ severity=[1, 2, 3, 4, 5] corresponding to num_classes=[15, 11, 8, 5, 3].
+ severity mainly refer to KADID-10K and Imagecorruptions.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [15, 11, 8, 5, 3][severity - 1]
+ img = np.array(img).astype(np.float32)
+ for i in range(img.shape[2]):
+ img_gray = img[:, :, i]
+ thresholds = threshold_multiotsu(img_gray, classes=c, nbins=30) # modify skimage
+ v_max = img_gray.max()
+ v_min = img_gray.min()
+ img[:, :, i] = np.digitize(img[:, :, i], bins=thresholds) * (v_max - v_min) / c + v_min
+ img = np.clip(img, 0, 255)
+ return img
+
+
+def quantization_median(img, severity=1):
+ """
+ Color Quantization using Histogram Median.
+ severity=[1, 2, 3, 4, 5] corresponding to num_classes=[20, 15, 10, 6, 3].
+ severity mainly refer to KADID-10K and Imagecorruptions.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [20, 15, 10, 6, 3][severity - 1]
+ for i in range(img.shape[2]):
+ img_gray = Image.fromarray(img[:, :, i])
+ img_gray = img_gray.quantize(colors=c, method=Image.Quantize.MEDIANCUT).convert("L")
+ img[:, :, i] = np.array(img_gray)
+ img = np.clip(img, 0, 255)
+ return img
+
+
+def quantization_hist(img, severity=1):
+ """
+ Color Quantization using Histogram Equalization.
+ severity=[1, 2, 3, 4, 5] corresponding to num_classes=[24, 16, 8, 6, 4].
+ severity mainly refer to KADID-10K and Imagecorruptions.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [24, 16, 8, 6, 4][severity - 1]
+ hist, _ = np.histogram(img.flatten(), bins=c, range=[0, 255])
+ cdf = hist.cumsum()
+ cdf_m = np.ma.masked_equal(cdf, 0)
+ cdf_m = (cdf_m - cdf_m.min()) * 255 / (cdf_m.max() - cdf_m.min())
+ cdf = np.ma.filled(cdf_m, 0).astype('uint8')
+ img = np.uint8(np.round(img / 255 * (c - 1)))
+ img = cdf[img]
+ img = np.clip(img, 0, 255)
+ return img
diff --git a/degradation_toolkit/x_distortion/saturate.py b/degradation_toolkit/x_distortion/saturate.py
new file mode 100644
index 0000000000000000000000000000000000000000..7943f2007464a907d7da061f6cc18b0d5a731859
--- /dev/null
+++ b/degradation_toolkit/x_distortion/saturate.py
@@ -0,0 +1,75 @@
+import cv2
+import numpy as np
+
+
+def saturate_weaken_HSV(img, severity=1):
+ """
+ Saturate Weaken by scaling S channel in HSV.
+ severity=[1, 2, 3, 4, 5] corresponding to scale=[0.7, 0.55, 0.4, 0.2, 0.0].
+ severity mainly refer to KADID-10K.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [0.7, 0.55, 0.4, 0.2, 0.0][severity - 1]
+ hsv = np.array(cv2.cvtColor(img, cv2.COLOR_RGB2HSV), dtype=np.float32)
+ hsv[:, :, 1] = c * hsv[:, :, 1]
+ hsv = np.uint8(np.clip(hsv, 0, 255))
+ img = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
+ return img
+
+
+def saturate_weaken_YCrCb(img, severity=1):
+ """
+ Saturate Weaken by scaling S channel in YCrCb.
+ severity=[1, 2, 3, 4, 5] corresponding to scale=[0.6, 0.4, 0.2, 0.1, 0.0].
+ severity mainly refer to PieAPP.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [0.6, 0.4, 0.2, 0.1, 0.0][severity - 1]
+ ycrcb = np.array(cv2.cvtColor(img, cv2.COLOR_RGB2YCR_CB), dtype=np.float32)
+ ycrcb[:, :, 1] = 128 + (ycrcb[:, :, 1] - 128) * c
+ ycrcb[:, :, 2] = 128 + (ycrcb[:, :, 2] - 128) * c
+ ycrcb = np.uint8(np.clip(ycrcb, 0, 255))
+ img = cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2RGB)
+ return img
+
+
+def saturate_strengthen_HSV(img, severity=1):
+ """
+ Saturate Strengthen by scaling S channel in HSV.
+ severity=[1, 2, 3, 4, 5] corresponding to scale=[3.0, 6.0, 12.0, 20.0, 64.0].
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [3.0, 6.0, 12.0, 20.0, 64.0][severity - 1]
+ hsv = np.array(cv2.cvtColor(img, cv2.COLOR_RGB2HSV), dtype=np.float32)
+ hsv[:, :, 1] = c * hsv[:, :, 1]
+ hsv = np.uint8(np.clip(hsv, 0, 255))
+ img = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
+ return img
+
+
+def saturate_strengthen_YCrCb(img, severity=1):
+ """
+ Saturate Strengthen by scaling S channel in YCrCb.
+ severity=[1, 2, 3, 4, 5] corresponding to scale=[2.0, 3.0, 5.0, 8.0, 16.0].
+ severity mainly refer to PieAPP.
+
+ @param img: Input image, H x W x 3, value range [0, 255]
+ @param severity: Severity of distortion, [1, 5]
+ @return: Degraded image, H x W x 3, value range [0, 255]
+ """
+ c = [2.0, 3.0, 5.0, 8.0, 16.0][severity - 1]
+ ycrcb = np.array(cv2.cvtColor(img, cv2.COLOR_RGB2YCR_CB), dtype=np.float32)
+ ycrcb[:, :, 1] = 128 + (ycrcb[:, :, 1] - 128) * c
+ ycrcb[:, :, 2] = 128 + (ycrcb[:, :, 2] - 128) * c
+ ycrcb = np.uint8(np.clip(ycrcb, 0, 255))
+ img = cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2RGB)
+ return img
diff --git a/degradation_toolkit/x_distortion/spatter.py b/degradation_toolkit/x_distortion/spatter.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f78472a674cb69bdcea970ba678cf083a2e93c0
--- /dev/null
+++ b/degradation_toolkit/x_distortion/spatter.py
@@ -0,0 +1,74 @@
+import cv2
+import numpy as np
+from skimage.filters import gaussian
+
+def rgb2gray(rgb):
+ return np.dot(rgb[..., :3], [0.2989, 0.5870, 0.1140])
+
+def spatter(x, severity=1):
+ c = [(0.65, 0.3, 4, 0.69, 0.6, 0),
+ (0.65, 0.3, 3, 0.68, 0.6, 0),
+ (0.65, 0.3, 2, 0.68, 0.5, 0),
+ (0.65, 0.3, 1, 0.65, 1.5, 1),
+ (0.67, 0.4, 1, 0.65, 1.5, 1)][severity - 1]
+ x_PIL = x
+ x = np.array(x, dtype=np.float32) / 255.
+
+ liquid_layer = np.random.normal(size=x.shape[:2], loc=c[0], scale=c[1])
+
+ liquid_layer = gaussian(liquid_layer, sigma=c[2])
+ liquid_layer[liquid_layer < c[3]] = 0
+ if c[5] == 0:
+ liquid_layer = (liquid_layer * 255).astype(np.uint8)
+ dist = 255 - cv2.Canny(liquid_layer, 50, 150)
+ dist = cv2.distanceTransform(dist, cv2.DIST_L2, 5)
+ _, dist = cv2.threshold(dist, 20, 20, cv2.THRESH_TRUNC)
+ dist = cv2.blur(dist, (3, 3)).astype(np.uint8)
+ dist = cv2.equalizeHist(dist)
+ ker = np.array([[-2, -1, 0], [-1, 1, 1], [0, 1, 2]])
+ dist = cv2.filter2D(dist, cv2.CV_8U, ker)
+ dist = cv2.blur(dist, (3, 3)).astype(np.float32)
+
+ m = cv2.cvtColor(liquid_layer * dist, cv2.COLOR_GRAY2BGRA)
+ m /= np.max(m, axis=(0, 1))
+ m *= c[4]
+ # water is pale turqouise
+ color = np.concatenate((175 / 255. * np.ones_like(m[..., :1]),
+ 238 / 255. * np.ones_like(m[..., :1]),
+ 238 / 255. * np.ones_like(m[..., :1])), axis=2)
+
+ color = cv2.cvtColor(color, cv2.COLOR_BGR2BGRA)
+
+ if len(x.shape) < 3 or x.shape[2] < 3:
+ add_spatter_color = cv2.cvtColor(np.clip(m * color, 0, 1),
+ cv2.COLOR_BGRA2BGR)
+ add_spatter_gray = rgb2gray(add_spatter_color)
+
+ return np.clip(x + add_spatter_gray, 0, 1) * 255
+
+ else:
+
+ x = cv2.cvtColor(x, cv2.COLOR_BGR2BGRA)
+
+ return cv2.cvtColor(np.clip(x + m * color, 0, 1),
+ cv2.COLOR_BGRA2BGR) * 255
+ else:
+ m = np.where(liquid_layer > c[3], 1, 0)
+ m = gaussian(m.astype(np.float32), sigma=c[4])
+ m[m < 0.8] = 0
+
+ x_rgb = np.array(x_PIL.convert('RGB'))
+
+ # mud brown
+ color = np.concatenate((63 / 255. * np.ones_like(x_rgb[..., :1]),
+ 42 / 255. * np.ones_like(x_rgb[..., :1]),
+ 20 / 255. * np.ones_like(x_rgb[..., :1])),
+ axis=2)
+ color *= m[..., np.newaxis]
+ if len(x.shape) < 3 or x.shape[2] < 3:
+ x *= (1 - m)
+ return np.clip(x + rgb2gray(color), 0, 1) * 255
+
+ else:
+ x *= (1 - m[..., np.newaxis])
+ return np.clip(x + color, 0, 1) * 255
\ No newline at end of file
diff --git a/degradation_utils.py b/degradation_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3443823b6e85b668518f76d85ca877ee4470577
--- /dev/null
+++ b/degradation_utils.py
@@ -0,0 +1,232 @@
+import numpy as np
+import cv2
+import random
+from PIL import Image
+
+from degradation_toolkit.add_degradation_various import *
+from degradation_toolkit.image_operators import *
+from degradation_toolkit.x_distortion import *
+
+
+degradation_list1 = [
+ 'blur',
+ 'noise',
+ 'compression',
+ 'brighten',
+ 'darken',
+ 'spatter',
+ 'contrast_strengthen',
+ 'contrast_weaken',
+ 'saturate_strengthen',
+ 'saturate_weaken',
+ 'oversharpen',
+ 'pixelate',
+ 'quantization',
+]
+
+
+degradation_list2 = [
+ 'Rain',
+ 'Ringing',
+ 'r_l',
+ 'Inpainting',
+ 'mosaic',
+ 'SRx2',
+ 'SRx4',
+ 'GaussianNoise',
+ 'GaussianBlur',
+ 'JPEG',
+ 'Resize',
+ 'SPNoise',
+ 'LowLight',
+ 'PoissonNoise',
+ 'gray',
+ 'ColorDistortion',
+]
+
+
+degradation_list3 = [
+ 'Laplacian',
+ 'Canny',
+ 'Sobel',
+ 'Defocus',
+ 'Mosaic',
+ 'Barrel',
+ 'Pincushion',
+ 'Spatter',
+ 'Elastic',
+ 'Frost',
+ 'Contrast',
+]
+
+
+degradation_list4 = [
+ 'flip',
+ 'rotate90',
+ 'rotate180',
+ 'rotate270',
+ 'identity',
+]
+
+
+all_degradation_types = degradation_list1 + degradation_list2 + degradation_list3 + degradation_list4
+
+
+def single2uint(img):
+ return np.uint8((img.clip(0, 1) * 255.0).round())
+
+
+def uint2single(img):
+ return np.float32(img / 255.0)
+
+
+def add_x_distortion_single_images(img_gt1, deg_type):
+ # np.uint8, BGR
+ x_distortion_dict = distortions_dict
+ severity = random.choice([1, 2, 3, 4, 5])
+ if deg_type == 'compression' or deg_type == "quantization":
+ severity = min(3, severity)
+ deg_type = random.choice(x_distortion_dict[deg_type])
+
+ img_gt1 = cv2.cvtColor(img_gt1, cv2.COLOR_BGR2RGB)
+ img_lq1 = globals()[deg_type](img_gt1, severity)
+
+ img_gt1 = cv2.cvtColor(img_gt1, cv2.COLOR_RGB2BGR)
+ img_lq1 = cv2.cvtColor(img_lq1, cv2.COLOR_RGB2BGR)
+
+ return img_lq1, img_gt1, deg_type
+
+
+def add_degradation_single_images(img_gt1, deg_type):
+ if deg_type == 'Rain':
+ value = random.uniform(40, 200)
+ img_lq1 = add_rain(img_gt1, value=value)
+ elif deg_type == 'Ringing':
+ img_lq1 = add_ringing(img_gt1)
+ elif deg_type == 'r_l':
+ img_lq1 = r_l(img_gt1)
+ elif deg_type == 'Inpainting':
+ l_num = random.randint(20, 50)
+ l_thick = random.randint(10, 20)
+ img_lq1 = inpainting(img_gt1, l_num=l_num, l_thick=l_thick)
+ elif deg_type == 'mosaic':
+ img_lq1 = mosaic_CFA_Bayer(img_gt1)
+ elif deg_type == 'SRx2':
+ H, W, _ = img_gt1.shape
+ img_lq1 = cv2.resize(img_gt1, (W//2, H//2), interpolation=cv2.INTER_CUBIC)
+ img_lq1 = cv2.resize(img_lq1, (W, H), interpolation=cv2.INTER_CUBIC)
+ elif deg_type == 'SRx4':
+ H, W, _ = img_gt1.shape
+ img_lq1 = cv2.resize(img_gt1, (W//4, H//4), interpolation=cv2.INTER_CUBIC)
+ img_lq1 = cv2.resize(img_lq1, (W, H), interpolation=cv2.INTER_CUBIC)
+
+ elif deg_type == 'GaussianNoise':
+ level = random.uniform(10, 50)
+ img_lq1 = add_Gaussian_noise(img_gt1, level=level)
+ elif deg_type == 'GaussianBlur':
+ sigma = random.uniform(2, 4)
+ img_lq1 = iso_GaussianBlur(img_gt1, window=15, sigma=sigma)
+ elif deg_type == 'JPEG':
+ level = random.randint(10, 40)
+ img_lq1 = add_JPEG_noise(img_gt1, level=level)
+ elif deg_type == 'Resize':
+ img_lq1 = add_resize(img_gt1)
+ elif deg_type == 'SPNoise':
+ img_lq1 = add_sp_noise(img_gt1)
+ elif deg_type == 'LowLight':
+ lum_scale = random.uniform(0.3, 0.4)
+ img_lq1 = low_light(img_gt1, lum_scale=lum_scale)
+ elif deg_type == 'PoissonNoise':
+ img_lq1 = add_Poisson_noise(img_gt1, level=2)
+ elif deg_type == 'gray':
+ img_lq1 = cv2.cvtColor(img_gt1, cv2.COLOR_BGR2GRAY)
+ img_lq1 = np.expand_dims(img_lq1, axis=2)
+ img_lq1 = np.concatenate((img_lq1, img_lq1, img_lq1), axis=2)
+ elif deg_type == 'None':
+ img_lq1 = img_gt1
+ elif deg_type == 'ColorDistortion':
+ if random.random() < 0.5:
+ channels = list(range(3))
+ random.shuffle(channels)
+ img_lq1 = img_gt1[..., channels]
+ else:
+ channel = random.randint(0, 2)
+ img_lq1 = img_gt1.copy()
+ if random.random() < 0.5:
+ img_lq1[..., channel] = 0
+ else:
+ img_lq1[..., channel] = 1
+ else:
+ print('Error!', '-', deg_type, '-')
+ exit()
+ img_lq1 = np.clip(img_lq1 * 255, 0, 255).round().astype(np.uint8)
+ img_lq1 = img_lq1.astype(np.float32) / 255.0
+ img_gt1 = np.clip(img_gt1 * 255, 0, 255).round().astype(np.uint8)
+ img_gt1 = img_gt1.astype(np.float32) / 255.0
+
+ return img_lq1, img_gt1
+
+
+def calculate_operators_single_images(img_gt1, deg_type):
+ img_gt1 = img_gt1.copy()
+
+ if deg_type == 'Laplacian':
+ img_lq1 = Laplacian_edge_detector(img_gt1)
+ elif deg_type == 'Canny':
+ img_lq1 = Canny_edge_detector(img_gt1)
+ elif deg_type == 'Sobel':
+ img_lq1 = Sobel_edge_detector(img_gt1)
+ elif deg_type == 'Defocus':
+ img_lq1 = defocus_blur(img_gt1, level=(3, 0.2))
+ elif deg_type == 'Mosaic':
+ img_lq1 = mosaic_CFA_Bayer(img_gt1)
+ elif deg_type == 'Barrel':
+ img_lq1 = simulate_barrel_distortion(img_gt1, k1=0.1, k2=0.05)
+ elif deg_type == 'Pincushion':
+ img_lq1 = simulate_pincushion_distortion(img_gt1, k1=-0.1, k2=-0.05)
+ elif deg_type == 'Spatter':
+ img_lq1 = uint2single(spatter((img_gt1), severity=1))
+ elif deg_type == 'Elastic':
+ img_lq1 = elastic_transform((img_gt1), severity=4)
+ elif deg_type == 'Frost':
+ img_lq1 = uint2single(frost(img_gt1, severity=4))
+ elif deg_type == 'Contrast':
+ img_lq1 = adjust_contrast(img_gt1, clip_limit=4.0, tile_grid_size=(4, 4))
+
+ if np.mean(img_lq1).astype(np.float16) == 0:
+ print(deg_type, 'prompt&query zero images.')
+ img_lq1 = img_gt1.copy()
+
+ return img_lq1, img_gt1
+
+
+def add_degradation(image, deg_type):
+ if deg_type in degradation_list1:
+ list_idx = 1
+ img_lq1, _, _ = add_x_distortion_single_images(np.copy(image), deg_type)
+ img_lq1 = uint2single(img_lq1)
+ elif deg_type in degradation_list2:
+ list_idx = 2
+ img_lq1, _ = add_degradation_single_images(np.copy(uint2single(image)), deg_type)
+ elif deg_type in degradation_list3:
+ list_idx = 3
+ if deg_type in ['Laplacian', 'Canny', 'Sobel', 'Frost']:
+ img_lq1, _ = calculate_operators_single_images(np.copy(image), deg_type)
+ else:
+ img_lq1, _ = calculate_operators_single_images(np.copy(uint2single(image)), deg_type)
+ if img_lq1.max() > 1:
+ img_lq1 = uint2single(img_lq1)
+ elif deg_type in degradation_list4:
+ list_idx = 4
+ img_lq1 = np.copy(uint2single(image))
+ if deg_type == 'flip':
+ img_lq1 = np.flip(img_lq1, axis=1)
+ elif deg_type == 'rotate90':
+ img_lq1 = np.rot90(img_lq1, k=1)
+ elif deg_type == 'rotate180':
+ img_lq1 = np.rot90(img_lq1, k=2)
+ elif deg_type == 'rotate270':
+ img_lq1 = np.rot90(img_lq1, k=3)
+ elif deg_type == 'identity':
+ pass
+ return Image.fromarray(single2uint(img_lq1)), list_idx
diff --git a/demo_tasks/__init__.py b/demo_tasks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7236bf0992c5c11d8d8679719e3bffb9d0595023
--- /dev/null
+++ b/demo_tasks/__init__.py
@@ -0,0 +1,13 @@
+from .gradio_tasks import dense_prediction_text, conditional_generation_text, process_dense_prediction_tasks, process_conditional_generation_tasks
+from .gradio_tasks_restoration import image_restoration_text, process_image_restoration_tasks
+from .gradio_tasks_style import style_transfer_text, style_condition_fusion_text, process_style_transfer_tasks, process_style_condition_fusion_tasks
+from .gradio_tasks_tryon import tryon_text, process_tryon_tasks
+from .gradio_tasks_editing import editing_text, process_editing_tasks
+from .gradio_tasks_photodoodle import photodoodle_text, process_photodoodle_tasks
+from .gradio_tasks_editing_subject import editing_with_subject_text, process_editing_with_subject_tasks
+from .gradio_tasks_relighting import relighting_text, process_relighting_tasks
+from .gradio_tasks_unseen import unseen_tasks_text, process_unseen_tasks
+from .gradio_tasks_subject import subject_driven_text, condition_subject_fusion_text, condition_subject_style_fusion_text, style_transfer_with_subject_text, \
+ image_restoration_with_subject_text, \
+ process_subject_driven_tasks, process_image_restoration_with_subject_tasks, process_style_transfer_with_subject_tasks, process_condition_subject_style_fusion_tasks, \
+ process_condition_subject_fusion_tasks
\ No newline at end of file
diff --git a/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9.jpg b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0dd17034dd5d1d649ea79cd78f608deb58724b29
--- /dev/null
+++ b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0c4bde1a615edc572ce74f5e334c7cd035f686bf94d00fa13de1c825a837010
+size 227638
diff --git a/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_ben2-background-removal.jpg b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..72aa89095606ebbe2ed48cae3d4dee654c1bb4c5
--- /dev/null
+++ b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59ecf637fce4eb98d75a9f17f005887b62fcf4f25db258a0e3f39f93a0704014
+size 47062
diff --git a/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_canny_100_200_512.jpg b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6df7f05d2e15698eff3a404bb650aa3628aad2de
--- /dev/null
+++ b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a78563977bc2236d93bff7a2e1459efcb737eabe5e044c70ce627840796106e9
+size 183790
diff --git a/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_depth-anything-v2_Large.jpg b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..370a23536bfe51a1fc5b1bc7e31efefe87ad4990
--- /dev/null
+++ b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:944b88ce7c555db6653fab381d475d7af2d5552dd732c9fe3b81c28a9aa6b425
+size 80719
diff --git a/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_dsine_normal_map.jpg b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..acfd3c0b3717208df4321db141000673e6c3a319
--- /dev/null
+++ b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:107b1cfe736e79078760fbbeffa050e82584e4265eb632948107926a2e0b1ed3
+size 142596
diff --git a/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_hed_512.jpg b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4bb8e5e7cf871177a41df077b3e944254a581c53
--- /dev/null
+++ b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7db142dd0065491bc4c8367cd1b5c7fbaaa754eafed5ab80a9980d43c6c5a326
+size 164556
diff --git a/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_sam2_mask.jpg b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c316375669aea2db2306017971e7dc26b70d4f1f
--- /dev/null
+++ b/demo_tasks/examples/012cd3921e1f97d761eeff580f918ff9/012cd3921e1f97d761eeff580f918ff9_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2439f48bf3a32e2e380c6c73016d63660a6a36cc8fbeeb3737c8b92c9ca3a07
+size 228612
diff --git a/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3.jpg b/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a0ca26d2effaaac040357f340ed334b6e3882c70
--- /dev/null
+++ b/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59350c813475dce227e740d946c5b7664c4390c6d26f3261ec0202a2480c5b74
+size 242999
diff --git a/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3_instantx-style_0.jpg b/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0823e4038cff94e17cdadb4cdad6e924fe8e0d56
--- /dev/null
+++ b/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19c642b67133efe536ee9a7cf9016e7f269db2d23c2bea9a077e83460cd7d629
+size 248980
diff --git a/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3_instantx-style_0_style.jpg b/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a4569eb8a7cac27f7f729553e2fd205105216ccb
--- /dev/null
+++ b/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0796121fe4d6a689278cf5af4f0428793b38c3ce9d5c39072a45a3c6a0dfe21e
+size 342364
diff --git a/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3_qwen2_5_mask.jpg b/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3_qwen2_5_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..22e42d3d278efea2f564797217e860502a5a47d3
--- /dev/null
+++ b/demo_tasks/examples/0fdaecdb7906a1bf0d6e202363f15de3/0fdaecdb7906a1bf0d6e202363f15de3_qwen2_5_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85dc6eb30251a4c7bb45d6a8caea4f9d93f40543dfb4ad6d50d647ab49a15bee
+size 65937
diff --git a/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462.jpg b/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..94564b9558a0d02c39d30a5982959d704117b63d
--- /dev/null
+++ b/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:156bcd9d1123e6732829ccca11eda8761191f219de538e5b1746c9d283c47627
+size 216488
diff --git a/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462_instantx-style_0.jpg b/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e206deb9bf6cd248bab0677b2d67cd7c69f59ca9
--- /dev/null
+++ b/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c9beac8a2d2713b06311394315c006192a1ea35f17ee80e8fc4d6f6a93464b4
+size 381724
diff --git a/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462_instantx-style_0_style.jpg b/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9e2007d6721129c6ef0bd13f7b76b9f68b7e9050
--- /dev/null
+++ b/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f91bd76cd88742f32d48f659d7e1890e5f6c7986f542b4a4b5041d9aa4cd6e23
+size 457896
diff --git a/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462_qwen2_5_mask.jpg b/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462_qwen2_5_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bd61ee1a03a9374ffcfe1a9a329a6b36fec97c83
--- /dev/null
+++ b/demo_tasks/examples/10d7dcae5240b8cc8c9427e876b4f462/10d7dcae5240b8cc8c9427e876b4f462_qwen2_5_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:538a5a535aad2befabcdb23914d8e4bf2dd00aedc0ec7f460742ce3a2d3ebb44
+size 52964
diff --git a/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3.jpg b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0115146fba654c27fe2b9122659bcd042962aafa
--- /dev/null
+++ b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e92c8ccfb16feac9a6ebb408c459cacb1480984b1f2069939834e1e22f342f82
+size 223838
diff --git a/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_canny_100_200_512.jpg b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0280cd77e6416837735ebcd4bcdc53382ba219fd
--- /dev/null
+++ b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:680ad5e599de63a1cdcf11fb52c81688c26ee1a49664940388b6526a01bce4ba
+size 227372
diff --git a/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_depth-anything-v2_Large.jpg b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..73d76ac4be7d7935f7901de42aff49f1bf127400
--- /dev/null
+++ b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a85966b4f60297e0b4c4cf50d1a5052589f4a56b12683aefaa508101fb0f1b75
+size 72591
diff --git a/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_dsine_normal_map.jpg b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..21521b3ed4209be8128c1f31d52b6856f8bb4d8b
--- /dev/null
+++ b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bcd73f41f18c76353aedea03130566b5fa7e63ac11e429eaa2a1e6469f8192c
+size 229654
diff --git a/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_hed_512.jpg b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..aabf5206a988174de4ffe54ae0097dec0b2a2e6f
--- /dev/null
+++ b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac807b0fa0c1dfd41fc8b4b0a20d5cd74eb414f53020d6471dabb269b6f67303
+size 169728
diff --git a/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_openpose_fullres_nohand.jpg b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_openpose_fullres_nohand.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..eb9a1c685a3cfcd5e1425240133a919570d4d7a2
--- /dev/null
+++ b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_openpose_fullres_nohand.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0dd5fa3b2ee1b2d71603a266f3b45b2f9e3bfe2be5ee9279d3ba98d0dc6b8231
+size 127109
diff --git a/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_sam2_mask.jpg b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fac83d2f29859a7a476a746f7a2ad6561801fede
--- /dev/null
+++ b/demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2729c28b7c2aad173ec15eb1c6426578aabbe3048f8fff4e7c138ed33c89934
+size 209035
diff --git a/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de.jpg b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6b23171c2ddc4bbef89d3d14676bf8b65781a10d
--- /dev/null
+++ b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42873e75c6fdb61555e708ac840b976d3eef4b75459529f122540eae7108112f
+size 248522
diff --git a/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_ben2-background-removal.jpg b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..85e4706a64eaf775c71df4376602f7729bcd25ef
--- /dev/null
+++ b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b87c1ac807916d1487bb862f15066d209a7500ef24077e11e922d7056f5e8f0b
+size 35078
diff --git a/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_canny_100_200_512.jpg b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0759c4fb3454398b8efae1aa3121db2890109f37
--- /dev/null
+++ b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:366435caadac2827ecca13babdf330af9325d17e315afa120237b3c74b7c72e0
+size 245504
diff --git a/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_depth-anything-v2_Large.jpg b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..03c8872fac84537822ed79a8209b2774c459909d
--- /dev/null
+++ b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce6a51be1be204fc34c6d7414eee8b90ddfd34799fe864c1ae5de2f0e2e378ad
+size 109603
diff --git a/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_dsine_normal_map.jpg b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4fbdeaa7ba301b9b7eab4b76e9bed471b217402d
--- /dev/null
+++ b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e00b81024e0095b1fd11e765baaf54f2aeb7d60dcee231a21e51ccf6a4e712fa
+size 121473
diff --git a/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_hed_512.jpg b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..917934b90340e7d9d3446769ac8ce96a1bb2abba
--- /dev/null
+++ b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05608dc817cae352ba734afe41950313b916ba3806d0b80b5b66a86ab9315ec8
+size 184158
diff --git a/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_instantx-style_0.jpg b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e51b81e86290c5010e92b0f5e153e7db3a5aa557
--- /dev/null
+++ b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:436202fa1183d1fbca5cd509b6103519d0813140ea52a3aa679517e564edfde0
+size 317914
diff --git a/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_instantx-style_0_style.jpg b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5a466fe6f7a9376fcd1e28201bd75e4f582ae825
--- /dev/null
+++ b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff1c102fb5f471398e8d53b7b9e737ab0c436c70b7c246495d8c49e5d8f2a6ea
+size 373789
diff --git a/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_sam2_mask.jpg b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a3e6919b3e7c25c785af2dffac6e481dacd8ae58
--- /dev/null
+++ b/demo_tasks/examples/2c4e256fa512cb7e7f433f4c7f9101de/2c4e256fa512cb7e7f433f4c7f9101de_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cc574c8c405b96421ded317616a67191c18a2a90531cc1dc1d4561a7579e622
+size 302800
diff --git a/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a.jpg b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..816ffe9a9f3d2a7212ac846e3cdb9499950bff1b
--- /dev/null
+++ b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e42a74f10495af6f0716b18b1089be5c46d7ad6f3a0cd6cf73c6387521b632f
+size 263697
diff --git a/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_ben2-background-removal.jpg b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9d1d4c9ba460ee6c46e533284e870ba45e62047e
--- /dev/null
+++ b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:076b846470741ad620f883a66f4d3e2d7681aa110e2add517ab9744932bfe33a
+size 32031
diff --git a/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_canny_100_200_512.jpg b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..00b194378bf601a2c9cecea7e6129cb982d53043
--- /dev/null
+++ b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac8a3c1e379e6130f1eaebbfeda9a2640800b0dfa439b247c9cad941f91212e1
+size 262489
diff --git a/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_depth-anything-v2_Large.jpg b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ba5f5dd0e99769fa9d74a9b6d5b95e1e39bf9d01
--- /dev/null
+++ b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c239c35085d26208ce27874d22bb98d33067be1ba8ffc154ba77d8111de90361
+size 83448
diff --git a/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_dsine_normal_map.jpg b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b02d288856268864938cec12a001dff3425edb11
--- /dev/null
+++ b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b78e869218a4cc86a4a86402546e56d332abc700f8a9f11569ee845fcba047b2
+size 75378
diff --git a/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_hed_512.jpg b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e9c6144a4407147b5bdad8064c8d2c1a39458ee3
--- /dev/null
+++ b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d45e4d1049902e0338a2e3f655829eacbdcdebe06ed74c9cf36f6959648ef7b8
+size 173472
diff --git a/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_instantx-style_0.jpg b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..08ecb5a6a72f2f859a5bd05fb1c87a72e1627f18
--- /dev/null
+++ b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f720e8a35a3e88fdf4bb397101273a1fa1b04e3dc0c01847968f73de7f182f7
+size 403122
diff --git a/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_instantx-style_0_style.jpg b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a09b37e91e21be8ddd769e514198a28ba9471aeb
--- /dev/null
+++ b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed11c2cdd9ff5f80502c77f3f569c9a76b51dd6f09709ff944021ac9efdb645b
+size 315105
diff --git a/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_sam2_mask.jpg b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9daf63186d4d269d3a515ed0d272793c739efa8e
--- /dev/null
+++ b/demo_tasks/examples/53b3f413257bee9e499b823b44623b1a/53b3f413257bee9e499b823b44623b1a_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:261928e1dc45c5f6a4caefcd4f6c454c31cb4a5e3664ffb7530643b16445319c
+size 239491
diff --git a/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e.jpg b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..acb59dd1a23a6b7edff23b6aba21c6399c1f36d9
--- /dev/null
+++ b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a38c8399d75a03a4698fbba6f679bfd254e0b395e4b29c5fa910e456360f9a1e
+size 209913
diff --git a/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_ben2-background-removal.jpg b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8766a5648ebc32c565fc7bcb85692b8a706407f0
--- /dev/null
+++ b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2346b5c8cc01cf5ab4292b86cea5a446688ac12139c84edafd0183e02787ea2e
+size 32599
diff --git a/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_canny_100_200_512.jpg b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b83ce2dcad0657597f3e37368c2bf952316fea58
--- /dev/null
+++ b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b91a98bdd10227ef531fb8a923be12d5e7221e8cb8acfbf0adf01148b6af9d0
+size 177410
diff --git a/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_depth-anything-v2_Large.jpg b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2ade7df1e5d9190a07cad35160bfa366d26a916d
--- /dev/null
+++ b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aeafc34894e7e50bf7cae926acfee4a43c7156f857132f359d58b50ceadba267
+size 67737
diff --git a/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_dsine_normal_map.jpg b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9b2b880c424822b970c60ba6b4b5d4332704b9cc
--- /dev/null
+++ b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a21efb8f5f8fe2dde3977d0dd07c2aef1d442097274bebb4f280a5772df0366
+size 87735
diff --git a/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_hed_512.jpg b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..390df27a0cc0fccf19234edb36a9688457b8aed2
--- /dev/null
+++ b/demo_tasks/examples/5bf755ed9dbb9b3e223e7ba35232b06e/5bf755ed9dbb9b3e223e7ba35232b06e_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2e3016bda8b30e2c5bd134eca39a0d66c810f51e5ff578721a55088f342d486
+size 142922
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1c65f200b78660cd5d9e71d9139b39c15503923f
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6b740124d507fb4ff452c4d6c73120a104851a5e30e9380cf8fc34fdad799b7
+size 308600
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_ben2-background-removal.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e9f3db1cf3bbb95784c40af2f755869a4b181daf
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bdae801bacabd7f889af7d7d3659cdbff563edd315df4cbcc4124f72e8db597
+size 81800
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_canny_100_200_512.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..65eb192bcff972ae70d8d8d333c6e6a160a647a3
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ccd3b793e21c462687fd97d172e59682c69de85d21e31517a610939297ffeae
+size 295100
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_depth-anything-v2_Large.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ff6940c1ef88ca4f7bc69e427f34b98a625239cf
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4ecc8e0a5e8b59b276517b33b25d6a5a700e5a1a26f39f2533625235d85d6f9
+size 98671
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_dsine_normal_map.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5be5837c6aeef0b260c4cdb07aae52a9621ed210
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b9bc882b8388273ac5ba81d612f09a72765b4de1065831b9c0089910518c5ac
+size 192377
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_edge.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_edge.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cf5b28a411311ecd7105e6bbb1a64b23f5fd4d96
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_edge.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af53852dfbefc94a46fa364ef1320a43aebda86729f53b17ccd8796025a43322
+size 7850
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_hed_512.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..727d6067861df273dbfb6fb355304c8e1a63cd8f
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7394212b80816d1b2c54f10354dfa2407f7ffdfdfb500a98168f7f302713869
+size 195830
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_instantx-style_0.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0345483effa6dad261cac526a9addd7c3d2387dc
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7b31a38a63be4f27acd71942d107b85ac99ee140097a96f1744f3fa31f85551
+size 320901
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_instantx-style_0_style.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6dff8dc0eac49c7dc37efe5ccc033b7b99181124
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abfeb146d90c7bd977a51c8dab40a63025cf49f4931416f18c91d53c3cf51f9f
+size 701226
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_openpose_fullres_nohand.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_openpose_fullres_nohand.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ceff1b0c1cc76cf5f529e037e9bc7f818008abbe
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_openpose_fullres_nohand.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51d3f86764b65ccbb49e81e79be22e09d3369eee050b9fc0a499bd2d79c35ee1
+size 45379
diff --git a/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_sam2_mask.jpg b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5872d8c1449a16b61d1714d40526181c271e302d
--- /dev/null
+++ b/demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:907e0b18d1ace3d4031641f123260054bb5846554730566261787b11050c11db
+size 265876
diff --git a/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b.jpg b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4bc48a41f7f8541de35fea68d66d7dbc1fc9f5d4
--- /dev/null
+++ b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92ae5cd151a2f482a52ec1731a48d0b48bbc75b205184ce55d8b34ed7b739e1a
+size 272751
diff --git a/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_ben2-background-removal.jpg b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..95301496a97972a6154b302b3a4e1917814888fa
--- /dev/null
+++ b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1af5c9f233653f460ed97e536e2ab3b173dee6ebfe28058c4050f413dfbbb616
+size 46404
diff --git a/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_canny_100_200_512.jpg b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e284c2c870028f379e610ae39b175abaad9e9636
--- /dev/null
+++ b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3131cc80e0058a7ef5dd4238fc6883069f4099f94db46aa39b48a2045a486fd
+size 275151
diff --git a/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_depth-anything-v2_Large.jpg b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8ca7425a74219f14c6329eb86d59a8fff323c3a0
--- /dev/null
+++ b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e210df7b4c7a0eb599c80ed0d22b546f8c966878e8821880a85390bd7eed64a
+size 83798
diff --git a/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_dsine_normal_map.jpg b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cfe0a05c51ddd87d14013de0ffc2c36b3e48472f
--- /dev/null
+++ b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:736bb78480b5b0e8b936647018988be5bfc131ee6e1e545f02e68ee0fadeef6d
+size 225016
diff --git a/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_hed_512.jpg b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9bb80a4ac4900030ecdddfd0b54624095bb7bb28
--- /dev/null
+++ b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d737e3aec8b6580a29d85c2cc3e5629946ee43455767fdd7236cd9af623a6b20
+size 190624
diff --git a/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_instantx-style_0.jpg b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e26ea3c1add6c7995a8757c7f230b2d1aec55dbc
--- /dev/null
+++ b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:372d13551b6bbacc482a6e0a3ab5da7afe76648ff5a51743f8d3a7a3af83637a
+size 324699
diff --git a/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_instantx-style_0_style.jpg b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..66f82eef599f74b0bcf1b6b597e0a5b0a74f19fb
--- /dev/null
+++ b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:473119ca742e0a9e1a8c6c26d777ea8221518b6676803987578c23ee73021a79
+size 145360
diff --git a/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_sam2_mask.jpg b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a08f2c9062ce9d5ada542daac5877f51062bd9fb
--- /dev/null
+++ b/demo_tasks/examples/79f2ee632f1be3ad64210a641c4e201b/79f2ee632f1be3ad64210a641c4e201b_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16613544849ee04d774505f473967f8952ecacfa59e9a46980b7b637db5760a6
+size 274289
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3231aae4b37213772bd3d077d97f1a12ab9be555
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f6b57fb3d598db0bd725e4ba148e6e0ed08723e527fcfb27ff9d374e8cd859d
+size 165021
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_ben2-background-removal.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..34721fa882ee601a23d11d1d624ac81fcae8e2ed
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c53012fe2b8d0374e18bb9c849a9aa9d4d4579b5c9508ad4f4382001847518e0
+size 39155
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_canny_100_200_512.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1c90245d261f121b10d2bc462a3a649a78c17c9f
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5477f21c3ab5f23716dcfd57ef30911bc1fdd429ebda3072e84b7a910e05592c
+size 99929
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_depth-anything-v2_Large.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cb908c92ff47589e29de6af3e5f30efa887fa731
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0f2cdd4efa4a948422add8aa69b1f54e67b2c7e5e3aaea54aeaf903678d3604
+size 62100
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_dsine_normal_map.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f68c348ea8411a9781c80f271f6ea062eebfa596
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b777efd4b430df8cbda366030b5b67558cb875653227cd633aad7a6a779e818
+size 85061
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_hed_512.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7f52b4d384db3efd864c3f1cff590129d89eaa95
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:541999375fc5f70dcc55155f12b59672852bb4f9da104dc884de6834bf13418d
+size 105498
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_instantx-style_0.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..08b48bc00e9a6a5aff85fa656a81e837a9c4094e
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d5519893973ec67612e5a0867562ebb37c15f7c25b3d20fdd2382d7d6787b22
+size 293789
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_instantx-style_0_style.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5e537d8cb5e24f643180c67f03436693fb1f121c
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fcd503a9da5bb7d2fe68fcd2d733be5c93de67d278455a8716c6a7c19a8caba
+size 68688
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_openpose_fullres_nohand.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_openpose_fullres_nohand.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8d34ba8877eb6269217dda0998a71414e46b2664
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_openpose_fullres_nohand.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8f984f536c030c515cedde83deee9ebf478c90be606aec1bfe0f548df6575f1
+size 41020
diff --git a/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_sam2_mask.jpg b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..83bda146b00d4b5e6c29068fc86a01216535c3e1
--- /dev/null
+++ b/demo_tasks/examples/88d0ba30e2c0bc4401cf2633cac162d4/88d0ba30e2c0bc4401cf2633cac162d4_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93548ce0f3e80cbfdb24c05f43ee63159a922eefb7d8ef7ea698bf6dc71ad890
+size 136879
diff --git a/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2.jpg b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..adf7e976a0b278a9abe2cfabd2de02f089d48932
--- /dev/null
+++ b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dd4947e63c05271bd4bca9d9259d3a9adb6f3705799b31589b65872a9cbd63e
+size 254902
diff --git a/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_ben2-background-removal.jpg b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2d7fb11e61515076d77039cdf6216022da3a07c5
--- /dev/null
+++ b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab395421a97a9c84fb9120e24c43e2c5aede02fa17c43c1e33eb3517471afcb2
+size 49523
diff --git a/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_canny_100_200_512.jpg b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..398a17026c4feee06d421341115fe5c26b238ebc
--- /dev/null
+++ b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f12a310fcdf43e449b5d6712d89cf11cc70aa36a3f293aaa73e37e445ce45146
+size 248401
diff --git a/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_depth-anything-v2_Large.jpg b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d9ef96667a345ef4235305f6058f1537e98c50f7
--- /dev/null
+++ b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f09453bbf5c2ab1095f718a284c1773394f3b6714bc684cf81f8ae2e5e061c7
+size 80664
diff --git a/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_dsine_normal_map.jpg b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..77532544445ee3fd7def1c5be86d71eabaf4cecd
--- /dev/null
+++ b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc33bb5cc1642894cdedbcf3849d415407b033c2299ce6148217a584cee7d210
+size 221129
diff --git a/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_hed_512.jpg b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7e30ca993f476a15e5ac79728f959fc9c23fba9e
--- /dev/null
+++ b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c80af49bd7c5bc3c6a0331ebd99c364315d14bce342389108179107b700ee3b6
+size 201980
diff --git a/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_openpose_fullres_nohand.jpg b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_openpose_fullres_nohand.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..533df14058c3c4218f4e90628cd0c0e623b59ec3
--- /dev/null
+++ b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_openpose_fullres_nohand.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52f6fcbdf86500194eb47ab9363ddd28d74dfaed941952027bcaed94930d04c2
+size 44728
diff --git a/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_sam2_mask.jpg b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3c9e08dcd470df4e023d16dc62605505eef0cd6f
--- /dev/null
+++ b/demo_tasks/examples/93bc1c43af2d6c91ac2fc966bf7725a2/93bc1c43af2d6c91ac2fc966bf7725a2_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01a898e2e670579dc9e07d08cb435011dcaea15042a706ace1859bde1433e9b9
+size 299906
diff --git a/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a.jpg b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0db427c7d1fb49177999a9ba9c7911a07dc32df9
--- /dev/null
+++ b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82982ee7b0876bfc086d293f590cb6515b674f1561b935c83df51959d670f9cb
+size 189166
diff --git a/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_ben2-background-removal.jpg b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e8766e81274be2c54f46f6fe2f1a9aeb9c48c184
--- /dev/null
+++ b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:466c862b6b15c01e9b047722e5b1a28e52140a90de5ae20b5abd900f3a455f8c
+size 34031
diff --git a/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_canny_100_200_512.jpg b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6e134c80b3a84ef029a69a7a68f0061a88193d20
--- /dev/null
+++ b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6808a53974b22e973b6f2cfb54d1863ff9e92c1119bd6d2c48442986fa4d9967
+size 166906
diff --git a/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_depth-anything-v2_Large.jpg b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3bc23d620e5197e251f82fcc7167d6f3bc335ca7
--- /dev/null
+++ b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5037b5ca4f29c91c5c2eab7e630adcff2baeaed89b06443b8ba6d1f0333fe086
+size 52743
diff --git a/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_dsine_normal_map.jpg b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6f67bca45a184a7aa5be2a9821b896a20b3ef49b
--- /dev/null
+++ b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:260ac4db7c59a61b5455379fcf7d0ecdbd5ab5d54e753d90349e848de63d3a9d
+size 77361
diff --git a/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_hed_512.jpg b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..400141db2cf95f45f1e1224bfc5696e102381882
--- /dev/null
+++ b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c363781e15ffe55a5f3cdc7174f84bb3f127d65cb81cc55c2974b6765e6f59ff
+size 85662
diff --git a/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_sam2_mask.jpg b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6646ff3cd90ddafd606283c5e1bd769715866d1e
--- /dev/null
+++ b/demo_tasks/examples/9c565b1aad76b22f5bb836744a93561a/9c565b1aad76b22f5bb836744a93561a_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b193a23dfe8736be3c86b9fe3f8465974b88b6e34161843cc58b8746df4b488b
+size 166401
diff --git a/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710.jpg b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7c83fb5b3e2d7fdc71178c809f334a6c5ad89ebe
--- /dev/null
+++ b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6a03d1b9361ee349a0ce712a683bf9ae5bc7b3f58a76fea17a112a6717629fa
+size 547958
diff --git a/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_ben2-background-removal.jpg b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..aab2be204f2cc65ce0fcefdb1183118295e4aacd
--- /dev/null
+++ b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:add9a1d54e241659451ece48c55c9e1055784e84be06a237a7ede3a5798352cd
+size 218494
diff --git a/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_canny_100_200_512.jpg b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7ab4076d1f8ba30025b3534b80414026d2f7e4bc
--- /dev/null
+++ b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91efee1da06d1703a0d2ff8a2b23001e853398c142828dda51b4bc5fd58ab923
+size 466736
diff --git a/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_depth-anything-v2_Large.jpg b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a86db645d0bd17f0651dd527e0f42648c201d694
--- /dev/null
+++ b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:184f4f16a3d129872782c36f8ab0cdfe68700050ee73b3aa808a8a25264a7f05
+size 96751
diff --git a/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_dsine_normal_map.jpg b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..38d9b513a918673c8504409aeef9a9edf45803ce
--- /dev/null
+++ b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69d5df967de28ab4805bbb40ddc6a7d3af6ba5bec86793fcc2a646c2986da6c3
+size 196664
diff --git a/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_hed_512.jpg b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7420662902c73d43c8029307bb1da3c8f67b8244
--- /dev/null
+++ b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9698f695346cf4ac277153e6d0a8ebbf8b0567b09adf50b482bfaaa2cec6b5b3
+size 216125
diff --git a/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_sam2_mask.jpg b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0022d5ce2ba5a6275b86fae1d198d704a2d1ed9e
--- /dev/null
+++ b/demo_tasks/examples/9d39f75f1f728e097efeaff39acb4710/9d39f75f1f728e097efeaff39acb4710_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17e141b2fdda33dcf366d1e4df63f1e27d7e0043d9275bcd9c2be2ecb7852642
+size 231095
diff --git a/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90.jpg b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ff84bd387545f28480fa947e0679434065a1e442
--- /dev/null
+++ b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86bf656841c0882c813b7c76530590d1d4b10485b1bdd79bec193496591788f9
+size 231979
diff --git a/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_ben2-background-removal.jpg b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_ben2-background-removal.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d8a9a319a8ebed5205d97daed53e9b4f2297d33e
--- /dev/null
+++ b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_ben2-background-removal.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e4e95054f96a817d193e0f00c85fa09a1f5564ed0c13aa500bfb433d8773da3
+size 86949
diff --git a/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_canny_100_200_512.jpg b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d1abd8364a6b60495bb0bfeff7090f6db4ba2e38
--- /dev/null
+++ b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef8284516b71868b1990d88c313bd4743d1cfa490cafaeaf8c525e4f3a0c67c9
+size 180458
diff --git a/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_depth-anything-v2_Large.jpg b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6de8e1b049a953374846abcfaaeb41f8be40c3f9
--- /dev/null
+++ b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:096ad210cb1898f4d8b5d949ad0698b5e6f3808e20efbc59c76eb37cdb4162f5
+size 74827
diff --git a/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_dsine_normal_map.jpg b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_dsine_normal_map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..97196fa10fb8a9e90a25e11619cae5ae199f664b
--- /dev/null
+++ b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_dsine_normal_map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0092f2d99c56e83b9442d1d589d2624cc4a3b7d2994fa713263d4a10571e5633
+size 157782
diff --git a/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_hed_512.jpg b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a2014c65786fdd77ecadb4577c2a4f755d6bf996
--- /dev/null
+++ b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9b75a04a2f2233b1fa7779088d589da008ed9a3bf940dcfab8456b559bc76f7
+size 156499
diff --git a/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_sam2_mask.jpg b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7e448004212cdc26fdbc38f6db776ea6a4ef6a6c
--- /dev/null
+++ b/demo_tasks/examples/de5a8b250bf407aa7e04913562dcba90/de5a8b250bf407aa7e04913562dcba90_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac01f3bdbd8ee0f951f069077e310e24bac3d119cf5c698306d0c0cbfccd9cb1
+size 258388
diff --git a/demo_tasks/examples/env/1_source.jpg b/demo_tasks/examples/env/1_source.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..294fa7e430fbf8665490f4ffeb9661d6acbd3d95
--- /dev/null
+++ b/demo_tasks/examples/env/1_source.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02902e10de12106ab4c94445e15c0a66166de4359e0b664a832971730de50c9a
+size 175651
diff --git a/demo_tasks/examples/env/1_target.jpg b/demo_tasks/examples/env/1_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..485360c25497049b0f3877d72fc7fecbc79f3b86
--- /dev/null
+++ b/demo_tasks/examples/env/1_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0ff39133ef63671c7b1429031e978275e1170f5ecaba8c556e9b0f1eca66c1f
+size 135680
diff --git a/demo_tasks/examples/env/2_source.jpg b/demo_tasks/examples/env/2_source.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..35c1762c228511852a13b64243428d1026d67333
--- /dev/null
+++ b/demo_tasks/examples/env/2_source.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67f4b04e8913353a68f4252b31f2c1d063b5c829bb6c9e647e6743871e9f9c2a
+size 71884
diff --git a/demo_tasks/examples/env/2_target.jpg b/demo_tasks/examples/env/2_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ef4c3e157f8d38957142770221b82b7b49d6ec84
--- /dev/null
+++ b/demo_tasks/examples/env/2_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1d831d1b2ea43d49635e3fa1907a28cb7ee105b62fab6c48fefc250e83e0429
+size 69771
diff --git a/demo_tasks/examples/env/3_source.jpg b/demo_tasks/examples/env/3_source.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dcd6338ea77ad5faece68834768ad238fa91a391
--- /dev/null
+++ b/demo_tasks/examples/env/3_source.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad70a98bf80b7fb47574fdc1dc587910ba1a844905e8d41099f62ba2c7048eb1
+size 100764
diff --git a/demo_tasks/examples/env/3_target.jpg b/demo_tasks/examples/env/3_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..600daa669e5fe03305ceecfa0f659a4823abfc4c
--- /dev/null
+++ b/demo_tasks/examples/env/3_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52dd67a81387726c331aa2369b9cee2149169ea5818d2b4efe7ccd987d22ad01
+size 91556
diff --git a/demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_1.jpg b/demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..67f2216ee2980728ccd4def5408a3c52ebde6929
--- /dev/null
+++ b/demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_1.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c62192905568842cca8673307216704a0bd3b15dec83689a489d527dd048edbb
+size 108222
diff --git a/demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_6.jpg b/demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_6.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a13dc525fc4a00a622c0cf2ad91ec4420f08b98a
--- /dev/null
+++ b/demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_6.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c0582e2c9a5123b2c19b7fafa03dd3451f76eb7fd8727fea692ee88652a3e67
+size 126583
diff --git a/demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_8.jpg b/demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bd5042c6dcfdd9473cb0cb2819a012007912b1de
--- /dev/null
+++ b/demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_8.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bef4b34e1ab6ada3a068a96119db7f9018b4a432b48a63b9184b66ed4fc2186
+size 116108
diff --git a/demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_0.jpg b/demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ebafe19c73dc75f9b70edae491c78c5f19cf596d
--- /dev/null
+++ b/demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed633130911d34da42e1daa3ad8d8bcae480a924085217fb5414ea5f74374880
+size 119970
diff --git a/demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_1.jpg b/demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b805b69641a963f3303a1f832e780191c77b1d7e
--- /dev/null
+++ b/demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_1.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f2717a2d5839f61aea0e8205ce87cd09bff0f80889feadd6a2f92273cecc90f
+size 116791
diff --git a/demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_2.jpg b/demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b36d15b77505fa3979c90521cef07d81a57f7ea1
--- /dev/null
+++ b/demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_2.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c06881da347ee30527baac9024c553501209463f2775c72218a67c3ce0235d04
+size 126311
diff --git a/demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_0.jpg b/demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ca8b87bc9ca4d928603672d7b5302069faae2592
--- /dev/null
+++ b/demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b573b3fb863ec8bf95409ef18416215148a9a3967c6bcdbe7ebae3b80b76605
+size 127442
diff --git a/demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_1.jpg b/demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1fe84e5cd7162aab1f95f6a3541fd66e477cc33b
--- /dev/null
+++ b/demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_1.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1716fac314e1fa5b53a490b485fb21adef8ea987dca41c0ae09b977f8180988a
+size 115079
diff --git a/demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_2.jpg b/demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4ae968256b1eb59c7838a32a42894e1d9276b29d
--- /dev/null
+++ b/demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_2.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3145b139a80d1c2bf5d5e22c3d25ddb01b08a45f2e5321d23ec312a7e9b1e97e
+size 142203
diff --git a/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_canny_100_200_512.jpg b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..63068b39b84bf9177aabaeee4c59cb8be4bc4c70
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b15cfc5c5a95d4cf79dcb224313d5154bdd8a270ec07ed59c24de7426d182ad4
+size 48224
diff --git a/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_depth-anything-v2_Large.jpg b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b42c249099c95eda685246062cc810e758c33e32
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:078800702d6965c296d80f4034774746faf1cc5c0f6ce079032fa5a9090a59a8
+size 19603
diff --git a/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_dsine-normal-map.jpg b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_dsine-normal-map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8c4189e129e8d45e401021c217cd13d08f15ef84
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_dsine-normal-map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5dccd08e019f1509297f548f4735c4520e837502f61cb9a1e2e4784d1d713d8
+size 42117
diff --git a/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_hed_512.jpg b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..657fff65cde0027ed1d77ce32d3c33ea2c738f98
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:647e0a4d505ccb717cf11f20f2fc709077a8d91abb6290881ee0ba7e7944c7aa
+size 49384
diff --git a/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_instantx-style_0.jpg b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f7cf99cddb08668d018727a263edd6936b156131
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:383e5d910b0b81c0223a8e235dceb1a58f3c17527219ed22e71e6fd8506e1f5a
+size 78742
diff --git a/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_instantx-style_0_style.jpg b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a5de6e66cfc031bef896514f42ff17f711c1aa87
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da994e461ec6d17bb36212c4235ed69478ee7e23c5bbee00a8d1cf1a8943c346
+size 73211
diff --git a/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_reference.jpg b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_reference.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..358b5fe7cf46b0f2438ca0d3bd1a464ee69d4164
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_reference.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:817f3f56ff630a93fa540fdf34eb1c8138ef92d78118cd1fe0213f17c0a4a7a9
+size 23999
diff --git a/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_sam2_mask.jpg b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2d9ec1457c25abf833beecf69a6c0a441cca565b
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2d3addc2da54e71a9220848a4b1576a53c434a911ad4845bb43215fe880c270
+size 65826
diff --git a/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_target.jpg b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c50875cae5b716888a639283deef38d2111853ed
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00004-of-00022-7170/data-00004-of-00022-7170_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01f49ea39a67ed6e70b5e6eb8433665359230e9376e201660b546fd6087c2caf
+size 41013
diff --git a/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_canny_100_200_512.jpg b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ac6fd314678d848424395fd7ea4439e94ec9e1eb
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7048164e57a1205b4532c31d84662d84a0e6eb6a12f0ab84437e3753e697b1a2
+size 73610
diff --git a/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_depth-anything-v2_Large.jpg b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ad9e9aac3a3bc2166cd56da2459d5691923b4c74
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07bab247edccb82519a0235ca942de79c2d3ecfcd4a93c7ec4da47fc324eaaf2
+size 38154
diff --git a/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_dsine-normal-map.jpg b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_dsine-normal-map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1bb59bb65b9d71741c148ae5ce11cbde06373662
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_dsine-normal-map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f3888f32630f23f62c9c322fd81c4c8996969e9090e8a2494f14b025494d9e4
+size 49710
diff --git a/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_hed_512.jpg b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..59f9caa65bf3fe9dec1c534a3c2f84658a5d21cc
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:318d6d5474d20eb82ad4dc00a85705da7195babd4f03a90e91836320a626e9c9
+size 68948
diff --git a/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_instantx-style_0.jpg b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..03d2d1caa08dc59d56df19901273d518f5b928d8
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5de1cd109c39dad234d3ddbee98ea062e8f402f2731bee2e1048173879c7c77
+size 73756
diff --git a/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_instantx-style_0_style.jpg b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b4482d8c97f99b7b435925fd99fba2786e1d8087
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86d45afb07aabfcc2e745699371313741774fdc314fc095594a7c0c55969f323
+size 34397
diff --git a/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_reference.jpg b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_reference.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4ee94f9593c6a3ca34bb46f6b15c058535e40d8c
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_reference.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ddf19e591931fe462dfcb6543f6f73db11c2204acd782a586469850c1ff4d7e
+size 32303
diff --git a/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_sam2_mask.jpg b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9d5a3e45a575ba96df9dca952d0c9990a06492a1
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:465e496a2bf7ce323677a008db02f88feaddfd8777b62993912b0276e54209f3
+size 97874
diff --git a/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_target.jpg b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4c7529405b4852f9e1725a5775312a9931c9783e
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00005-of-00022-4396/data-00005-of-00022-4396_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c2deaef15acc4f1957734daa846d71132c8b3744d72c22c41a238ca39c381e2
+size 55262
diff --git a/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_canny_100_200_512.jpg b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9f3451b0f3f827c72fad69b47678beac33a9a71a
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b59f226141f7a4a9664768aa590c410d77949a167a3711f90ade163118e48d93
+size 69863
diff --git a/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_depth-anything-v2_Large.jpg b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..59feb1ea9fa5664ca1c9ed0479d1ba71aeb5f51b
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88653f98c9a9cbe0a398980cea303a72be47956a51a0e752613779fc7d13ede0
+size 26149
diff --git a/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_dsine-normal-map.jpg b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_dsine-normal-map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b1603f14e6160bed2a9de9a645d3594a7910a6b4
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_dsine-normal-map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3dc9f12e3ba03286ba09e3b9c3a8e0f52ed79fa02451395d7b9c807c8833e469
+size 53699
diff --git a/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_hed_512.jpg b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4d20921e659afd55a8e05c850a7f29855b066a07
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebac25d3290de7716b88d417edd26b693aee1af4825908d63c93f3438849b52d
+size 59714
diff --git a/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_instantx-style_0.jpg b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bcba03d100d9510ab716ce1d25cfd944641721f5
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e013015484b7cd6d159b1668aa7bc7ccb9cdfc4e5e69c988e572ec98630dbdd
+size 119813
diff --git a/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_instantx-style_0_style.jpg b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..78fda34a4209bbe3a646004013605348dda41ae8
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eab6412526d8cee26c26afc79d942c7c248012ac00f6a07ece4d68de8d27d02e
+size 221058
diff --git a/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_reference.jpg b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_reference.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bbdfac672931b97becb110bd822fa4ea7d624e93
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_reference.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54a562e1b6c203192edd1f0ca9cf45dd7dabbcc2a86891a2cd7574f9be28169a
+size 37950
diff --git a/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_sam2_mask.jpg b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2d2ff2615effa109a9d9b840e66d618f85fa3f91
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fbd3e660f852a79e0106df0c5989f7fe7332f34a612af417f01e01cb4cb08ea
+size 90743
diff --git a/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_target.jpg b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2b4026065f16bdc5ce93c1625b9d6646ca29f909
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00013-of-00022-4696/data-00013-of-00022-4696_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66c8ad61faaa50402594cdaa7669f43ada359d20cd573f10100dd27af5604e3b
+size 62085
diff --git a/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_canny_100_200_512.jpg b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1365bc103fe239443c1f944b8f1640779b7bf728
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6c635ff97c6053e5183b35253327802aa1ba28fd89be36f7e6b35eca9eb6e85
+size 99971
diff --git a/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_depth-anything-v2_Large.jpg b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..582959dbcfa04a1ee97ad9087822660933f034f2
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17b52d33c0dceb2d5febb4ab79df791688214cef88b0637ab6c8a6e9fb541efe
+size 29961
diff --git a/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_dsine-normal-map.jpg b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_dsine-normal-map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..af5c9460bfe179739c4568290b41f1f065762961
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_dsine-normal-map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:052cf694ad5d11e1a3913632812a4e51dfa0667792e3a218bfbd0479ef4d0ec8
+size 65669
diff --git a/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_hed_512.jpg b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6564728df633239b6ea536ff3cb160f656ffc765
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f321bda65f278ef578e726d6a9402a0abaf644c0b689da00f11c44aa8fd17ea8
+size 75710
diff --git a/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_instantx-style_0.jpg b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7e18f6e1097df931e4c7bdb7a449f4b243418549
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bfc9571ca02ce09af124fda0e651224f9c8023e23f167a7b52800cbcb7eee9d
+size 103019
diff --git a/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_instantx-style_0_style.jpg b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b95d909ba16ef99bf5294ca4c6d982af92046fc9
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99a0dd362afe69f12799bd319053bcb2dd41c916e1ba7034d0bfda5dfcdfd43f
+size 213002
diff --git a/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_reference.jpg b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_reference.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..58920256beefcd9411f100d2fcc46afed7a64b59
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_reference.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53ac22c893ab8700720b71f40ee39b45058a896b4bb408ebd5c1e141f8dd71c2
+size 29143
diff --git a/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_sam2_mask.jpg b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8f92e909319c611d98a92dbf56e54ca69aae42a9
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06e665ce4f90e02b1b198f06427e15025dc0b471947fb8102c0761728eb19b9a
+size 117217
diff --git a/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_target.jpg b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e7d19fab9e9eb7201544e2c84cd43bb4ab521474
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00017-of-00022-8377/data-00017-of-00022-8377_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a858b85929171aa94400c5be23b56c3dc2f66b1c6c3969ac949f9f3edd72a2f
+size 71908
diff --git a/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_canny_100_200_512.jpg b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_canny_100_200_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0bd8b94c88f5a8c6d86e60b8c916e2c8319754ac
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_canny_100_200_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c3f2d8ea9e2d0b47ec7714f3608cf277d239ab6413cabd0a7cb5ba566ccb1d7
+size 86138
diff --git a/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_depth-anything-v2_Large.jpg b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_depth-anything-v2_Large.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1098c4a984ebb9f88c360f1420af7df092997eb1
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_depth-anything-v2_Large.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aff70beb3d4faaaf700a4b8f0c764a86c842dfad4d40c91c5fadfd1d90a46f6d
+size 24413
diff --git a/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_dsine-normal-map.jpg b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_dsine-normal-map.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c0d2503b432de2725182afd83ecbece3c52a44bb
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_dsine-normal-map.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f2037c206c3d1b2a53188a608958c34f19f4422d2b8f629db4d17ef7ac8d18c
+size 47579
diff --git a/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_hed_512.jpg b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_hed_512.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..717b819645a708bc1cf3427c2a2bd5b8d9dbd3c9
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_hed_512.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b72b11736dd0e832eb0b9e8cedfb94e24295ad9a32e6cd0d63068310a734ded0
+size 56659
diff --git a/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_instantx-style_0.jpg b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_instantx-style_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..01cf618a23f785566c9fc9a8fd007be6c8a826b3
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_instantx-style_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e2e88b75cf761bcf487976e24aea95b2b59ca79c5d8d20a2bcd9d613254c164
+size 83988
diff --git a/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_instantx-style_0_style.jpg b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_instantx-style_0_style.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3e5a76301ad9665970c7c62ba6ad1beaeb971a78
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_instantx-style_0_style.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9494efc8a6aa334fa731463ddefb2bdb6d1d103d54525d68c7a21246d41b8556
+size 70863
diff --git a/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_reference.jpg b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_reference.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2129f9c944e29e291065a3940541901583e63784
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_reference.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e824555881f36b46c8cd373cda1d1ce4135f4e3fcd8d72f42fdb16b700f4e536
+size 38089
diff --git a/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_sam2_mask.jpg b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_sam2_mask.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e54d8b3f5967ef01ec13b92b2761285dc03be1c2
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_sam2_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a29230d3b0e4a1e2615e7ee72432c96aa365e307ffd59739c1cf68c20d249880
+size 79201
diff --git a/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_target.jpg b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..443450f846a3ff816ba4f0a0aac708a784f1dcd4
--- /dev/null
+++ b/demo_tasks/examples/graph200k/data-00018-of-00022-4948/data-00018-of-00022-4948_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cc3c18241ed59416dfed2dc83dbc3311b9d07557217ce0731b1a3ef1ba8786d
+size 61210
diff --git a/demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_qwen_subject_replacement_1737373818845_1.jpg b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_qwen_subject_replacement_1737373818845_1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..08a73782d3523533ea01ea997ed2cfce654f4a0e
--- /dev/null
+++ b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_qwen_subject_replacement_1737373818845_1.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55f93db6a070fa4db9805f9abbfabc57a0fb0bd9069983c4b3a85cedffb8f4c1
+size 65264
diff --git a/demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_reference.jpg b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_reference.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..13b7e171f27d15aa241f8e03404f55954f07ce49
--- /dev/null
+++ b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_reference.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68cc44529aa152afc1311e0c24eaf0f8c02745483cb5d9e7eff01609c11cce7d
+size 73085
diff --git a/demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_target.jpg b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..02ac3878a8454fb5373180d12d7f572bcf602149
--- /dev/null
+++ b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1051dac5af24c8883d131f4afb1a0f2975ac937169b545c40b870904c8d4684
+size 63189
diff --git a/demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_qwen_subject_replacement_1737377830929_2.jpg b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_qwen_subject_replacement_1737377830929_2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..76a381418326d31f15af763deda507a1ea5eb2e4
--- /dev/null
+++ b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_qwen_subject_replacement_1737377830929_2.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75b64038070ad6287ca5f2ec5501169704c63e03f5d4e3375c31d8ae52b728cf
+size 62514
diff --git a/demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_reference.jpg b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_reference.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dd734f22505581bd72ee0ddf523b16a824d73b61
--- /dev/null
+++ b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_reference.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c134319362d99cdad2ef9d97835c90e0f9042d1e22e42e77e2cb120fc906ef67
+size 28696
diff --git a/demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_target.jpg b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..361a10bc5b7eb55ab74da1190b503bdd08cbf34e
--- /dev/null
+++ b/demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e532d82ffc539448c3fbbae34a26aa4cc933e5b234470af616cb043abf157ac1
+size 52131
diff --git a/demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_qwen_subject_replacement_1737410088010_2.jpg b/demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_qwen_subject_replacement_1737410088010_2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f82e82e2c6dbc503f5c6beb7545dbe5db763d6aa
--- /dev/null
+++ b/demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_qwen_subject_replacement_1737410088010_2.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46da8158843419a0e27ea0a61da4719c6c5b7d6ea187f0b972421c227385a9c2
+size 54681
diff --git a/demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_reference.jpg b/demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_reference.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a0cc8f098abbed9d8a6d3333baa76d3b9d35d95f
--- /dev/null
+++ b/demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_reference.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19cbebc3a36357c4156b7d7f361397a7683a93a635ee6cff36333a34984ff067
+size 20918
diff --git a/demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_target.jpg b/demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4e13e19ebe08ae529af8364f55d57772acdfadc5
--- /dev/null
+++ b/demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a223c8733a55fb2db980a2dd10bfd2b2daeadd2aef3191c9fed9171e73a74a0f
+size 48502
diff --git a/demo_tasks/examples/omniedit/task_obj_add_273266.jpg b/demo_tasks/examples/omniedit/task_obj_add_273266.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6b3f8a14dde1b501828cd0edb061572deda25003
--- /dev/null
+++ b/demo_tasks/examples/omniedit/task_obj_add_273266.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c8718cc05e41d87d3e7259eeabccf6f52208d747f7504a37a9f623ec689f74d
+size 105227
diff --git a/demo_tasks/examples/omniedit/task_obj_add_273266_edit.jpg b/demo_tasks/examples/omniedit/task_obj_add_273266_edit.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d7b46f8d880464b63dea3bb45a0b7238e98aa5b6
--- /dev/null
+++ b/demo_tasks/examples/omniedit/task_obj_add_273266_edit.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68203e7a1e51c7851af0d4ac421921c0e2b2e2d9870b835287ad9ec3bffbb682
+size 50874
diff --git a/demo_tasks/examples/omniedit/task_obj_add_528329.jpg b/demo_tasks/examples/omniedit/task_obj_add_528329.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..eefe568cccaba3f8dfb1920aaf0268f30d5897a8
--- /dev/null
+++ b/demo_tasks/examples/omniedit/task_obj_add_528329.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95c23040a6f694c8b92f738f4599d9edd8c8dc2051461d943b836f9c6dbec8fa
+size 219421
diff --git a/demo_tasks/examples/omniedit/task_obj_add_528329_edit.jpg b/demo_tasks/examples/omniedit/task_obj_add_528329_edit.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..49f77194587f9a5161baaf5d5f08c7bd8db888b5
--- /dev/null
+++ b/demo_tasks/examples/omniedit/task_obj_add_528329_edit.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e18cf9b1e005bc3adcdb1ff67d848187167600ded420c76029539667a01ea482
+size 193101
diff --git a/demo_tasks/examples/omniedit/task_obj_remove_855511.jpg b/demo_tasks/examples/omniedit/task_obj_remove_855511.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8c2721179cc8518d2cde5d1d403815314764beda
--- /dev/null
+++ b/demo_tasks/examples/omniedit/task_obj_remove_855511.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c8c89d7f26797787dc2229ce8dc9e29b1a67be1ff6d61ca458ede5e46f5bbc5
+size 442233
diff --git a/demo_tasks/examples/omniedit/task_obj_remove_855511_edit.jpg b/demo_tasks/examples/omniedit/task_obj_remove_855511_edit.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..495afbc9bb21601989bceeea2d46ff38ccbd8723
--- /dev/null
+++ b/demo_tasks/examples/omniedit/task_obj_remove_855511_edit.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fbada1b8d499a90ff173c3085a3b7aaf7adcfd6c8100241d20ff401f0a56167
+size 3335021
diff --git a/demo_tasks/examples/photodoodle/sksedgeeffect/1.jpg b/demo_tasks/examples/photodoodle/sksedgeeffect/1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..528653bfdfcb2cf3a03a5cbe45c6582ac61f1267
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksedgeeffect/1.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35e6b82c42e4e291cd5e1ec7736db9a1be65d58e18bf0385773eec4466b79f74
+size 590702
diff --git a/demo_tasks/examples/photodoodle/sksedgeeffect/1_blend.jpg b/demo_tasks/examples/photodoodle/sksedgeeffect/1_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..31c4538a1d5cdc5c22ff6b44e5cd97e2df29bad4
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksedgeeffect/1_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a459f0442af5cc3f751589f102fa8ccdfb862cde2ed8827f80925f7e30eeb111
+size 539281
diff --git a/demo_tasks/examples/photodoodle/sksedgeeffect/34.jpg b/demo_tasks/examples/photodoodle/sksedgeeffect/34.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5837f11bd106dca81e2bd55f87a583c8db96e48e
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksedgeeffect/34.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80a58974a589a0b13a8a45d574c92ee73e2fee4d1905f8de5cf86e8336dd257d
+size 539297
diff --git a/demo_tasks/examples/photodoodle/sksedgeeffect/34_blend.jpg b/demo_tasks/examples/photodoodle/sksedgeeffect/34_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d4730a828cc68f060056cc0c6384c2e403914cbf
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksedgeeffect/34_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a841e4b18ef60a093d9c1ca05a29e0b517df0946c6b7bfcd52de7e5c4f005009
+size 452425
diff --git a/demo_tasks/examples/photodoodle/sksmagiceffects/24.jpg b/demo_tasks/examples/photodoodle/sksmagiceffects/24.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a1823cf94b369a12402531a24ef002f2ef0bb42a
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmagiceffects/24.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa087a69301b2937168d71ab27b1a5e6b88e9b9ea3b9fbfac0a85dc39f6e994d
+size 265810
diff --git a/demo_tasks/examples/photodoodle/sksmagiceffects/24_blend.jpg b/demo_tasks/examples/photodoodle/sksmagiceffects/24_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7773d63817d3da3be77cb4c70d553deaaa026618
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmagiceffects/24_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc5f3ab92686c9183dc47b6d26596547721a2bc4424f2add277f7d5d0bcb281
+size 250239
diff --git a/demo_tasks/examples/photodoodle/sksmagiceffects/29.jpg b/demo_tasks/examples/photodoodle/sksmagiceffects/29.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b8348ad27a0b35183c6a5d8e402291b126a3bb94
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmagiceffects/29.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6982c6c50fdd907522002564e6ae5db7843cadcc460e5bb5ca9e6216d1804a3a
+size 692652
diff --git a/demo_tasks/examples/photodoodle/sksmagiceffects/29_blend.jpg b/demo_tasks/examples/photodoodle/sksmagiceffects/29_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a8756f106c8885cac29fba72b3702507f3c54e9e
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmagiceffects/29_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75dd398d16771a2b5a444419f21b174833bd5366b684c662b02dcf6d6016905a
+size 619915
diff --git a/demo_tasks/examples/photodoodle/sksmagiceffects/50.jpg b/demo_tasks/examples/photodoodle/sksmagiceffects/50.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..aaa5b6f78ed5ccb955f1e32f43dc889c2664f953
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmagiceffects/50.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cef13dd2ef0d5d356a93f202c2db014eac5e40766aadb336b0fe566a878c35e5
+size 457836
diff --git a/demo_tasks/examples/photodoodle/sksmagiceffects/50_blend.jpg b/demo_tasks/examples/photodoodle/sksmagiceffects/50_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c8f350db9558e448132c68d8618a21444ee8d0fc
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmagiceffects/50_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:477f2041f467ee5ad0b452179ac85b0f70e6990b1a41a671c9bffac7004e3506
+size 380867
diff --git a/demo_tasks/examples/photodoodle/sksmonstercalledlulu/5.jpg b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/5.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..783b7ece63b78efb0dd4a20355460a4c93109e59
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/5.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37fcf4d03f3fd13bff741e6d1d96d7b14651bf58b7f9b85be5864f00e980c72f
+size 366749
diff --git a/demo_tasks/examples/photodoodle/sksmonstercalledlulu/5_blend.jpg b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/5_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3c524b9c448dfba8f3ec4104c5b34f2c19ffa1d3
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/5_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:335a028da3fb974e36c6d88d97adee166f6a59b151eb1aa02f0bb01e0ebaf393
+size 314651
diff --git a/demo_tasks/examples/photodoodle/sksmonstercalledlulu/6.jpg b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/6.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9ce450e63f2173f83d292f2bd9915395a28d11dc
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/6.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:953f31f333b2d4ea6a69400463f0e355db4833bd72cbcdec23cd8ddb588ea7c5
+size 519609
diff --git a/demo_tasks/examples/photodoodle/sksmonstercalledlulu/6_blend.jpg b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/6_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f0de2c12d412f93aca2046661f31fe4d39ddf8e6
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/6_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37ee0e0419d99e8c07bb60ae3fd1e5fd9c1e9ce6f398f972d529449aea5c6c16
+size 392709
diff --git a/demo_tasks/examples/photodoodle/sksmonstercalledlulu/9.jpg b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/9.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..579ba659211bafe652be2bb7f28c129bc8157a40
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/9.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f59d4bb5202f77d417fbb524351f8059ac9620497b16a519018e54ac3091a25
+size 452743
diff --git a/demo_tasks/examples/photodoodle/sksmonstercalledlulu/9_blend.jpg b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/9_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1232d346e7819b52578792c7e2bd7f191648e193
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/sksmonstercalledlulu/9_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a74674712898e5a52c5e6ebcce21fe000bda0b81b6df4cbc3feb745cd2070df
+size 388506
diff --git a/demo_tasks/examples/photodoodle/skspaintingeffects/12.jpg b/demo_tasks/examples/photodoodle/skspaintingeffects/12.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f56ac1cae3c688a847c03b7842246bd0e130194e
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/skspaintingeffects/12.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf5cfa63012f31cdde62ad3d16ed53d198e91e37bcb98d1b623f649273bf6efc
+size 293168
diff --git a/demo_tasks/examples/photodoodle/skspaintingeffects/12_blend.jpg b/demo_tasks/examples/photodoodle/skspaintingeffects/12_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3ad8561f66b859483af6eeab46f36342d8478068
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/skspaintingeffects/12_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0a5d2f671d6326f08eb98773024357b394e8292b15e5e7b1971fd6795e4c270
+size 344162
diff --git a/demo_tasks/examples/photodoodle/skspaintingeffects/35.jpg b/demo_tasks/examples/photodoodle/skspaintingeffects/35.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8a9f6e092f51bb16f51174752df8636692178fdd
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/skspaintingeffects/35.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:056affdc0bc6cd0285174de834d54b65cc1f6acd7da1929404fb8cf3a84fc8ce
+size 517209
diff --git a/demo_tasks/examples/photodoodle/skspaintingeffects/35_blend.jpg b/demo_tasks/examples/photodoodle/skspaintingeffects/35_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b2c2de6e71f6bf63d74ac61c7a4a893117b465f8
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/skspaintingeffects/35_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5f0d8fa12262d5d506f39617bd91f89bbc8701e6ebdb21f4226ae73af8f4c24
+size 542275
diff --git a/demo_tasks/examples/photodoodle/skspaintingeffects/37.jpg b/demo_tasks/examples/photodoodle/skspaintingeffects/37.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e9bcec43128343498eb7c45235665654140b5e07
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/skspaintingeffects/37.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0326db197eef166ebea079ecba2f37e975aef56f5a6ac86ab36fd17fb05459c8
+size 706479
diff --git a/demo_tasks/examples/photodoodle/skspaintingeffects/37_blend.jpg b/demo_tasks/examples/photodoodle/skspaintingeffects/37_blend.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dd0b304e30913cb83184ab1223b6e9096fab585a
--- /dev/null
+++ b/demo_tasks/examples/photodoodle/skspaintingeffects/37_blend.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb168cc4ad8adb5683a83ac62c31fbb76f2630a2f106b76c214e1179c6361728
+size 560841
diff --git a/demo_tasks/examples/property/1_source.jpg b/demo_tasks/examples/property/1_source.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7ee555aae20110dd0a422443ce00aa843540115f
--- /dev/null
+++ b/demo_tasks/examples/property/1_source.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59def6501821741c906eda6b868471718663e81f211153232c6ba95f8e9c3c61
+size 106384
diff --git a/demo_tasks/examples/property/1_target.jpg b/demo_tasks/examples/property/1_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..eaba86a5a618419bb9fd7a7c24da20188bd90586
--- /dev/null
+++ b/demo_tasks/examples/property/1_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21385f08d796e1f556c9fd2e0e0fe88d9b0026a76a8b14bf2ed99519c2c8c28d
+size 101324
diff --git a/demo_tasks/examples/property/2_source.jpg b/demo_tasks/examples/property/2_source.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..45083915e60efade4a4bfe16134917a1b6cbd8f8
--- /dev/null
+++ b/demo_tasks/examples/property/2_source.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3085adfff77a268c57ddf836c19f51863fdb383d4c37281a24df619cc5a8892b
+size 67866
diff --git a/demo_tasks/examples/property/2_target.jpg b/demo_tasks/examples/property/2_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0b58a9ac7bde92786bc559f8e8bbe1ea2b32f9d9
--- /dev/null
+++ b/demo_tasks/examples/property/2_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c25a9c747fa1638d966896df5c722f70b0c1ab4d61a770c0f78bba75c0082b0
+size 68949
diff --git a/demo_tasks/examples/property/3_source.jpg b/demo_tasks/examples/property/3_source.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e5d05d773b0af9632342388a570c4e078800043f
--- /dev/null
+++ b/demo_tasks/examples/property/3_source.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65e4cac8268efa93b6324ae16f5923a167748bbb4741a0678eb8b8185228a030
+size 54999
diff --git a/demo_tasks/examples/property/3_target.jpg b/demo_tasks/examples/property/3_target.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fdd9e6b1eeee1396ee33e10108fde15b1804d7be
--- /dev/null
+++ b/demo_tasks/examples/property/3_target.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36a6f3d4b9bc6be9dbfc5d4fdb8e294469378113c997f81fbdfc538f82c2814d
+size 56517
diff --git a/demo_tasks/examples/relighting/02aee2a8df8f6e6f16ca4ec278203543656cecf1.jpg b/demo_tasks/examples/relighting/02aee2a8df8f6e6f16ca4ec278203543656cecf1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..531efea30a6e7010485862475f57934477504dbd
--- /dev/null
+++ b/demo_tasks/examples/relighting/02aee2a8df8f6e6f16ca4ec278203543656cecf1.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c77e3517f1305168af1420db6bd445b06cf5b07ea2d142fe2e2c8e974c464ea4
+size 409496
diff --git a/demo_tasks/examples/relighting/02aee2a8df8f6e6f16ca4ec278203543656cecf1_Left.jpg b/demo_tasks/examples/relighting/02aee2a8df8f6e6f16ca4ec278203543656cecf1_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..454153c7c278b8ecf9f7897d68ccbfa9b5e44446
--- /dev/null
+++ b/demo_tasks/examples/relighting/02aee2a8df8f6e6f16ca4ec278203543656cecf1_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef468297fd1ebb24474fb31831d82071c5da8021bd07d04736ed851f4f6a0b2d
+size 374003
diff --git a/demo_tasks/examples/relighting/02af06c41208b31248e94da13166a675c862b003.jpg b/demo_tasks/examples/relighting/02af06c41208b31248e94da13166a675c862b003.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..808723dd4f569f596161842abb1fabfb44c6abde
--- /dev/null
+++ b/demo_tasks/examples/relighting/02af06c41208b31248e94da13166a675c862b003.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:859df26337c27175680f18aea78b14194f80b9b3909dd3dfcb621911f8bcd9c9
+size 634673
diff --git a/demo_tasks/examples/relighting/02af06c41208b31248e94da13166a675c862b003_Left.jpg b/demo_tasks/examples/relighting/02af06c41208b31248e94da13166a675c862b003_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8bf2a4e1bdbbbb85cbc3f4ab8af574e22449f18b
--- /dev/null
+++ b/demo_tasks/examples/relighting/02af06c41208b31248e94da13166a675c862b003_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd391f9b00f9b2b254db7aa45c9f05c27e69daca9c67b23dd5f10ffcd9132842
+size 464715
diff --git a/demo_tasks/examples/relighting/02af9925c86c22b379e4e6d4f2762d66966ee281.jpg b/demo_tasks/examples/relighting/02af9925c86c22b379e4e6d4f2762d66966ee281.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5da28a9c46ec753876d1647f064522610c051f38
--- /dev/null
+++ b/demo_tasks/examples/relighting/02af9925c86c22b379e4e6d4f2762d66966ee281.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f94e05f8f0a7d2f7abfe10d5d5d89e8e713c884367db7db9f2c4532d2b10f493
+size 929551
diff --git a/demo_tasks/examples/relighting/02af9925c86c22b379e4e6d4f2762d66966ee281_Left.jpg b/demo_tasks/examples/relighting/02af9925c86c22b379e4e6d4f2762d66966ee281_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ce2292cf2b925728e815b0dca9f4c05257def0d3
--- /dev/null
+++ b/demo_tasks/examples/relighting/02af9925c86c22b379e4e6d4f2762d66966ee281_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdcdb600ec5e4990cd1656a7edcc1f7bb711f23d0592a5be03ebf7e63ce78e3a
+size 475931
diff --git a/demo_tasks/examples/relighting/02af99b6765a77a8f2ac87aa42d2f2453dcd590f.jpg b/demo_tasks/examples/relighting/02af99b6765a77a8f2ac87aa42d2f2453dcd590f.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a9be39fb13fe06d3c9f1231b50c20b5311d0a95a
--- /dev/null
+++ b/demo_tasks/examples/relighting/02af99b6765a77a8f2ac87aa42d2f2453dcd590f.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72e04f01f848ae8feb21ee1979082651ea77ccbdf7150bec9661ba4045e77fe0
+size 565619
diff --git a/demo_tasks/examples/relighting/02af99b6765a77a8f2ac87aa42d2f2453dcd590f_Left.jpg b/demo_tasks/examples/relighting/02af99b6765a77a8f2ac87aa42d2f2453dcd590f_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2fe594c0b36ef46fc14d5c451c55853b1698a46b
--- /dev/null
+++ b/demo_tasks/examples/relighting/02af99b6765a77a8f2ac87aa42d2f2453dcd590f_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6435043bcd8638390982b748af856193e552e1dfb8e615896b35092d3cc9d14
+size 392352
diff --git a/demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010.jpg b/demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c9328a4a97557734f6d78e598f1e849119f255fd
--- /dev/null
+++ b/demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:337c44ecf7af42cfd79411062470d127f484fa8ad04b347ca0847fc889679aac
+size 432416
diff --git a/demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010_Left.jpg b/demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1bf526a8bdc1e87806532ec859fce22e50c05a3b
--- /dev/null
+++ b/demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae64e5c795077b54e469d7e1229d298120808897fb9c804af9db3fca7c851d67
+size 430070
diff --git a/demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010_depth.jpg b/demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010_depth.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..85a8e850c93637a2d494b4955603874923a1133e
--- /dev/null
+++ b/demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010_depth.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e25ef11d80a44b7da333f442603f661bd327ab1b05327fdb3b5ccf6d0b95013c
+size 135612
diff --git a/demo_tasks/examples/relighting/02afbcf084a1e35bda34c26d2271d56b6a1c621e.jpg b/demo_tasks/examples/relighting/02afbcf084a1e35bda34c26d2271d56b6a1c621e.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5f7d501d2142e5aca0cfaf44ad248b5a51b39f2e
--- /dev/null
+++ b/demo_tasks/examples/relighting/02afbcf084a1e35bda34c26d2271d56b6a1c621e.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c5d2f171061a3b951dc90867604584693a4f394c5ec8484c59ac5881358fbe2
+size 591205
diff --git a/demo_tasks/examples/relighting/02afbcf084a1e35bda34c26d2271d56b6a1c621e_Left.jpg b/demo_tasks/examples/relighting/02afbcf084a1e35bda34c26d2271d56b6a1c621e_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3afd7e85744db34e580508fa58134dfb81d442f1
--- /dev/null
+++ b/demo_tasks/examples/relighting/02afbcf084a1e35bda34c26d2271d56b6a1c621e_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddd6d1714402d1f0848950599ab17a4bebef61911abf922ff39b3e3464971f42
+size 516457
diff --git a/demo_tasks/examples/relighting/02b02e2916bf2eb3608f5a806dc3b7ecbed3b649.jpg b/demo_tasks/examples/relighting/02b02e2916bf2eb3608f5a806dc3b7ecbed3b649.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9d1c0232580cb9307bcea492df6adad6af8865e2
--- /dev/null
+++ b/demo_tasks/examples/relighting/02b02e2916bf2eb3608f5a806dc3b7ecbed3b649.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0918f43185de9b21bdf6caf1442bdd48dd5af7507551511439e62f1a821f9d31
+size 565320
diff --git a/demo_tasks/examples/relighting/02b02e2916bf2eb3608f5a806dc3b7ecbed3b649_Left.jpg b/demo_tasks/examples/relighting/02b02e2916bf2eb3608f5a806dc3b7ecbed3b649_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..10ed0b16da11765866c7c54c9db7afc524ae3c0e
--- /dev/null
+++ b/demo_tasks/examples/relighting/02b02e2916bf2eb3608f5a806dc3b7ecbed3b649_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e58cc78cee85c74e067d585cab31888804271be5a2c6f4518775a14da5015bcc
+size 519047
diff --git a/demo_tasks/examples/relighting/02c02535d741e8800bf5fca514a85c29f7b4d115.jpg b/demo_tasks/examples/relighting/02c02535d741e8800bf5fca514a85c29f7b4d115.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fc57a9a6c722236bb5a4085fb4dfc352c77a9244
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c02535d741e8800bf5fca514a85c29f7b4d115.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abf386cccab77a065d4f462d8007cd2b101a5f1e89701ce6e6c5b2eba93e40b4
+size 596636
diff --git a/demo_tasks/examples/relighting/02c02535d741e8800bf5fca514a85c29f7b4d115_Left.jpg b/demo_tasks/examples/relighting/02c02535d741e8800bf5fca514a85c29f7b4d115_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..37a70eb238e82e846b92767c81e9c5fac808cbf7
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c02535d741e8800bf5fca514a85c29f7b4d115_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e5ba20d54a0e35ba815fb7a36f2ac5e2df8cbf26839d7857241d70868c750bc
+size 460036
diff --git a/demo_tasks/examples/relighting/02c032354830e33b94b50dee9a55113d91dae87b.jpg b/demo_tasks/examples/relighting/02c032354830e33b94b50dee9a55113d91dae87b.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..345deb6195ba7f262d4715565ba155430a4b422b
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c032354830e33b94b50dee9a55113d91dae87b.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c82e7dd37dd6b3e7747277b00b0a18b2f0c49e549628bd80e4bc341b778c87fe
+size 667545
diff --git a/demo_tasks/examples/relighting/02c032354830e33b94b50dee9a55113d91dae87b_Left.jpg b/demo_tasks/examples/relighting/02c032354830e33b94b50dee9a55113d91dae87b_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6b91ee0b53deeed9b3894b8666711b0b65549853
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c032354830e33b94b50dee9a55113d91dae87b_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:431e586783b93e86cdf2db4a810d60bb9dadcee0f347f78f6de253dc447a3f10
+size 455059
diff --git a/demo_tasks/examples/relighting/02c0359af5386b96429b8239783921088e007347.jpg b/demo_tasks/examples/relighting/02c0359af5386b96429b8239783921088e007347.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6b6837c678c4493c17a2901365805cbb34afe4ea
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c0359af5386b96429b8239783921088e007347.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d439b4f1d7e446a24c0bd0c0feacb087188cd33d3a4c8fb72b442a0c08d6c2b9
+size 645426
diff --git a/demo_tasks/examples/relighting/02c0359af5386b96429b8239783921088e007347_Left.jpg b/demo_tasks/examples/relighting/02c0359af5386b96429b8239783921088e007347_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..50e0a2e7b68ead5aa4aef81ae8d1421bf076a1f7
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c0359af5386b96429b8239783921088e007347_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93d850cd8f80cb1da9ed4e48a9c07481f4d82c17291be2f21bd797100b7305ae
+size 449971
diff --git a/demo_tasks/examples/relighting/02c063fcc7f36018f6b55d26c6d4e05d43474d70.jpg b/demo_tasks/examples/relighting/02c063fcc7f36018f6b55d26c6d4e05d43474d70.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0b6cd8607478778595e5650c7938793afba1b514
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c063fcc7f36018f6b55d26c6d4e05d43474d70.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bef9b2895b53309c2869070c9817182a88b7d337900edcb437c7be04e110a51
+size 490431
diff --git a/demo_tasks/examples/relighting/02c063fcc7f36018f6b55d26c6d4e05d43474d70_Left.jpg b/demo_tasks/examples/relighting/02c063fcc7f36018f6b55d26c6d4e05d43474d70_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9fd1a814363ac16ac94be8a9a49034e6aaa52fc5
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c063fcc7f36018f6b55d26c6d4e05d43474d70_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2d4315c70ef299da1dbd26f62d7dca042e98db476709312cdca5af259523972
+size 476324
diff --git a/demo_tasks/examples/relighting/02c3760bf08f00d9e2163248e2864f5e1a70d709.jpg b/demo_tasks/examples/relighting/02c3760bf08f00d9e2163248e2864f5e1a70d709.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ee3f94277225a80914cd5fe4b0551188a5fa1d83
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c3760bf08f00d9e2163248e2864f5e1a70d709.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:618b462cc0752fd73971acf48eb420c29aa0e8c824d9b6b2d15fce53494b247b
+size 929759
diff --git a/demo_tasks/examples/relighting/02c3760bf08f00d9e2163248e2864f5e1a70d709_Left.jpg b/demo_tasks/examples/relighting/02c3760bf08f00d9e2163248e2864f5e1a70d709_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..38a5566676f134d826383e1e3f4ce43baf1ddbe2
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c3760bf08f00d9e2163248e2864f5e1a70d709_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64d648bb6de4918025db8bd93d8ee8614bc30b607bacf498dbb080406dcc9b1a
+size 504424
diff --git a/demo_tasks/examples/relighting/02c39e8e82f4be91d24252c8bfbfdef033ec8a32.jpg b/demo_tasks/examples/relighting/02c39e8e82f4be91d24252c8bfbfdef033ec8a32.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7218fc47b101b2e56c509bbbd576d3e0bc7fbf23
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c39e8e82f4be91d24252c8bfbfdef033ec8a32.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:776d3b64b906dde25bf6134db4693aa6b844eadf47790f72c991b53af51e90a7
+size 577875
diff --git a/demo_tasks/examples/relighting/02c39e8e82f4be91d24252c8bfbfdef033ec8a32_Left.jpg b/demo_tasks/examples/relighting/02c39e8e82f4be91d24252c8bfbfdef033ec8a32_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ca180bcdec665572f06d543deaec6666b20d8b44
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c39e8e82f4be91d24252c8bfbfdef033ec8a32_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c6ea7ab2d327b7982f6b2e114ef050881feb85e569f983a406213bf73d6faf6
+size 482183
diff --git a/demo_tasks/examples/relighting/02c466ad25faefb2a8bbe84d20d5ca1beb323a6d.jpg b/demo_tasks/examples/relighting/02c466ad25faefb2a8bbe84d20d5ca1beb323a6d.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a72c9a31f6eded9154bb82401d5630705f328e0d
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c466ad25faefb2a8bbe84d20d5ca1beb323a6d.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:827c5b01449c11af0b464f7acfea29b854cfa69cd814b465b3bfc514ca007e0e
+size 955494
diff --git a/demo_tasks/examples/relighting/02c466ad25faefb2a8bbe84d20d5ca1beb323a6d_Left.jpg b/demo_tasks/examples/relighting/02c466ad25faefb2a8bbe84d20d5ca1beb323a6d_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6289d9ff0345bccc5f75b0a339525af43c8a6809
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c466ad25faefb2a8bbe84d20d5ca1beb323a6d_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aab583acd1954a7e32d1df83170a2a8fabea56f4bb6dede5aa14ae2059a27c2e
+size 532258
diff --git a/demo_tasks/examples/relighting/02c5200cac1d0f19256232a09708ac47f6ddfab3.jpg b/demo_tasks/examples/relighting/02c5200cac1d0f19256232a09708ac47f6ddfab3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..315a08435550086aa0d1028c18f7ba73ab3e112b
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c5200cac1d0f19256232a09708ac47f6ddfab3.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5377aaae464c5e71bc1815fb7c3f2c9aa59f7f683883ba79c8b462e369257a8
+size 769982
diff --git a/demo_tasks/examples/relighting/02c5200cac1d0f19256232a09708ac47f6ddfab3_Left.jpg b/demo_tasks/examples/relighting/02c5200cac1d0f19256232a09708ac47f6ddfab3_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a4b9fc39de76255d3b7c271f38e198860f838598
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c5200cac1d0f19256232a09708ac47f6ddfab3_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afd2e98f6e836cc7da88a7965f0adb84b4e89f88f90ca2fa2511ade8287ff0c9
+size 531080
diff --git a/demo_tasks/examples/relighting/02c53f12ec3d4a9a16d9b0ca3f7773ad2222100c.jpg b/demo_tasks/examples/relighting/02c53f12ec3d4a9a16d9b0ca3f7773ad2222100c.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fed843ce76903b2082e746fa64ed18a8afae9159
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c53f12ec3d4a9a16d9b0ca3f7773ad2222100c.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6999c651b6b64abc73a7f76b306a9a29524db8fee51ec9ec6ebeacc9730e8be3
+size 641304
diff --git a/demo_tasks/examples/relighting/02c53f12ec3d4a9a16d9b0ca3f7773ad2222100c_Left.jpg b/demo_tasks/examples/relighting/02c53f12ec3d4a9a16d9b0ca3f7773ad2222100c_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a163b60fec0eae4e6cb903bfbaca9756e6bcdb14
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c53f12ec3d4a9a16d9b0ca3f7773ad2222100c_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d05b665cfaac3854ac3b0fb114005e6635dad23b5f2b8eb2e85ef3a1efdf2d22
+size 613045
diff --git a/demo_tasks/examples/relighting/02c5cc03d46ce15494caaf3d65a2b2c7e09089f2.jpg b/demo_tasks/examples/relighting/02c5cc03d46ce15494caaf3d65a2b2c7e09089f2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1a7533b2427c1b47a9ed18975d397ea133d312fc
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c5cc03d46ce15494caaf3d65a2b2c7e09089f2.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d846e72768226cca5d0a81e506fcf0dff691ac6d5c8bf87c83a05374ee222e35
+size 270366
diff --git a/demo_tasks/examples/relighting/02c5cc03d46ce15494caaf3d65a2b2c7e09089f2_Left.jpg b/demo_tasks/examples/relighting/02c5cc03d46ce15494caaf3d65a2b2c7e09089f2_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5d4b04e12f168d6e6e7102200c24baed1e11790e
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c5cc03d46ce15494caaf3d65a2b2c7e09089f2_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b092051b9d2bdf360e1b7f6a0ba0e84238c2635233f1834f9b4eceb3971365d7
+size 433674
diff --git a/demo_tasks/examples/relighting/02c6c0f92a672110ff86bd12f4aa0d0083c9cf6b.jpg b/demo_tasks/examples/relighting/02c6c0f92a672110ff86bd12f4aa0d0083c9cf6b.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cd73dbbbc1e1b9cdc1ee95e83d6f19147d215a1e
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c6c0f92a672110ff86bd12f4aa0d0083c9cf6b.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2388d093bc199fa3f1a836960f086e22ba597500d608ebf528b66b70d0bb12e
+size 569457
diff --git a/demo_tasks/examples/relighting/02c6c0f92a672110ff86bd12f4aa0d0083c9cf6b_Left.jpg b/demo_tasks/examples/relighting/02c6c0f92a672110ff86bd12f4aa0d0083c9cf6b_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9d947baa4ebde1ff7e170e8e8a584a29964d79a6
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c6c0f92a672110ff86bd12f4aa0d0083c9cf6b_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9475309c010724c542a2da80532345f9a2a13022d3aa2f30e87055395c11e74d
+size 525885
diff --git a/demo_tasks/examples/relighting/02c73157a981e0ee669ca8125018efbdda1e1483.jpg b/demo_tasks/examples/relighting/02c73157a981e0ee669ca8125018efbdda1e1483.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5173ce077cff9acb1dbd6be3226054dfa5c3eb64
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c73157a981e0ee669ca8125018efbdda1e1483.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8de579b4f4d1928e677834949a2d9fccd6d4d96d6a370332b10df0d20573902f
+size 484347
diff --git a/demo_tasks/examples/relighting/02c73157a981e0ee669ca8125018efbdda1e1483_Left.jpg b/demo_tasks/examples/relighting/02c73157a981e0ee669ca8125018efbdda1e1483_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..921d92b33982a91db9fa2d14d51b582d850c860d
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c73157a981e0ee669ca8125018efbdda1e1483_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fff1c1734bb57658633f4cb2a7838a1e81652f77566494bd00274272afcb926d
+size 397655
diff --git a/demo_tasks/examples/relighting/02c77b643fbdaec82912634655426553f3d7a537.jpg b/demo_tasks/examples/relighting/02c77b643fbdaec82912634655426553f3d7a537.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b4baf9ae2cfb110e67cc335dbdaeb4a308d38529
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c77b643fbdaec82912634655426553f3d7a537.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3bf60996e374a90b1091a7e195d0dd37959dad68e4d78c434c2a505d3920a60
+size 374694
diff --git a/demo_tasks/examples/relighting/02c77b643fbdaec82912634655426553f3d7a537_Left.jpg b/demo_tasks/examples/relighting/02c77b643fbdaec82912634655426553f3d7a537_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..51e8efd406e926d1f5da5d8e4c274c7586119dab
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c77b643fbdaec82912634655426553f3d7a537_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:991ade2d5542eb50ebe48e9732d94242da26b3dac0d78ea445c4e23cd0919fee
+size 473118
diff --git a/demo_tasks/examples/relighting/02c7df6c0decd3d542e25089a0af6afe1e070b6a.jpg b/demo_tasks/examples/relighting/02c7df6c0decd3d542e25089a0af6afe1e070b6a.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a49d016a3e7cfea9b52a48f1cde57a430fcc701e
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c7df6c0decd3d542e25089a0af6afe1e070b6a.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae04552ff9c4e99199bd1e6372298e90e3196590e8a4b23874cf563c88722d8c
+size 718130
diff --git a/demo_tasks/examples/relighting/02c7df6c0decd3d542e25089a0af6afe1e070b6a_Left.jpg b/demo_tasks/examples/relighting/02c7df6c0decd3d542e25089a0af6afe1e070b6a_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..29ee7a4b0ac71a11a53cdaeab98d00d8447c9376
--- /dev/null
+++ b/demo_tasks/examples/relighting/02c7df6c0decd3d542e25089a0af6afe1e070b6a_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a92af116be7134f9d1f95e91d71ce3d667479764f07b91ed120e4b001c10ce1a
+size 501835
diff --git a/demo_tasks/examples/relighting/02daa50ac59bb9eabcbe0d5304af880d941bffc3.jpg b/demo_tasks/examples/relighting/02daa50ac59bb9eabcbe0d5304af880d941bffc3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7602107a76d279ade1d605e4c7d9004deb4242b4
--- /dev/null
+++ b/demo_tasks/examples/relighting/02daa50ac59bb9eabcbe0d5304af880d941bffc3.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1709e775b890e0b06b377ab00fed8a50fce23cabc194e59c542fe6ff178c2a24
+size 533869
diff --git a/demo_tasks/examples/relighting/02daa50ac59bb9eabcbe0d5304af880d941bffc3_Left.jpg b/demo_tasks/examples/relighting/02daa50ac59bb9eabcbe0d5304af880d941bffc3_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..421576c99545b72a91d3c4fa018e65a3cda6af0e
--- /dev/null
+++ b/demo_tasks/examples/relighting/02daa50ac59bb9eabcbe0d5304af880d941bffc3_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:252f994aa5b54e5a4c4b2f1a9accc6c83b3b5e7239d5e6318a84c1e3fa3b97d8
+size 426092
diff --git a/demo_tasks/examples/relighting/02daa93f44c11ebbe5942cc0b781c12e125a6c65.jpg b/demo_tasks/examples/relighting/02daa93f44c11ebbe5942cc0b781c12e125a6c65.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3346bfc30ea46e1a4fb45b02d71b992bdf23abff
--- /dev/null
+++ b/demo_tasks/examples/relighting/02daa93f44c11ebbe5942cc0b781c12e125a6c65.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69c3775ea0f714f4dde96933c4940b22ad06c5ca1c710604f6b2362c1093391a
+size 508306
diff --git a/demo_tasks/examples/relighting/02daa93f44c11ebbe5942cc0b781c12e125a6c65_Left.jpg b/demo_tasks/examples/relighting/02daa93f44c11ebbe5942cc0b781c12e125a6c65_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b059573f99ce639e49d48f31c672b00b2ffcd756
--- /dev/null
+++ b/demo_tasks/examples/relighting/02daa93f44c11ebbe5942cc0b781c12e125a6c65_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86d3a160e84c00a2749220bcd798f69eb50638dcf1a7904b180f8c4cd10c2d4f
+size 356820
diff --git a/demo_tasks/examples/relighting/02dac14fc596c51077015d280ea564faa1f735b5.jpg b/demo_tasks/examples/relighting/02dac14fc596c51077015d280ea564faa1f735b5.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..39744104191a293fecdc8df5fc918265cc085120
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dac14fc596c51077015d280ea564faa1f735b5.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:175709997045c8391f0bfb58a5093e0c822cf34642bc99f8fd6ecf37aa751484
+size 487979
diff --git a/demo_tasks/examples/relighting/02dac14fc596c51077015d280ea564faa1f735b5_Left.jpg b/demo_tasks/examples/relighting/02dac14fc596c51077015d280ea564faa1f735b5_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..29c27eef02fe49c2313764eae3e3a2338f58497c
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dac14fc596c51077015d280ea564faa1f735b5_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5782281797085ba7eec4aba6fbaea7efdafb4c18dfc83d9b35e1740a837b07b7
+size 460392
diff --git a/demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6.jpg b/demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..67eb969176229f514e5b446b0a0dc67f84b4d192
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ad2881b25d0a702a00b732da6ab90fb613f3e28c1575703bb27e4fd86533e26
+size 746960
diff --git a/demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6_Left.jpg b/demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ae9d06e0b786291e2e03195572cc8074d32dfaed
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d87047dfb1f6ddb8d1adf417c052e904e58f7f500df3d1e8075dbd7458cf046
+size 545333
diff --git a/demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6_depth.jpg b/demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6_depth.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2ec626192c5e4a8b6a6dc4cf0e4eb412bba98928
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6_depth.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b741a579d541138774d418ec6da02fc96df4f3215ebf26d9a2737d96b5961d72
+size 139658
diff --git a/demo_tasks/examples/relighting/02dafead46f6d59172d8df216c1f5ad11f9899b5.jpg b/demo_tasks/examples/relighting/02dafead46f6d59172d8df216c1f5ad11f9899b5.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..522d1daa10d513153c44f3c68090fb75b610ae2b
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dafead46f6d59172d8df216c1f5ad11f9899b5.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:267e93b9ea2deb103eff56367e7e90f7d5c09d50d4468105cab84473f46f9c1d
+size 819457
diff --git a/demo_tasks/examples/relighting/02dafead46f6d59172d8df216c1f5ad11f9899b5_Left.jpg b/demo_tasks/examples/relighting/02dafead46f6d59172d8df216c1f5ad11f9899b5_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5f81026c8bde39226a86c222c880334f23431edb
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dafead46f6d59172d8df216c1f5ad11f9899b5_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f89b1e4f166749a4aaa3a9da6cc5f43ffa8963dd544dc7fa2bc22178c5e85cd8
+size 519294
diff --git a/demo_tasks/examples/relighting/02db22466eb3bc19d6a10195e1b48fff696c1582.jpg b/demo_tasks/examples/relighting/02db22466eb3bc19d6a10195e1b48fff696c1582.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2577751f06ef485087692155ac034673310e29f4
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db22466eb3bc19d6a10195e1b48fff696c1582.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c64173cb6e7206474cbc1266d01cebb5cba9614434896c0f186db48f5982d2f7
+size 450386
diff --git a/demo_tasks/examples/relighting/02db22466eb3bc19d6a10195e1b48fff696c1582_Left.jpg b/demo_tasks/examples/relighting/02db22466eb3bc19d6a10195e1b48fff696c1582_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..151650fb16bf92a268ba8cb9b9d91f515f60454f
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db22466eb3bc19d6a10195e1b48fff696c1582_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a48109f6d2df65327d88255cc9f6e03d616a252d3ad4527eb2f9606fd50f78a7
+size 399226
diff --git a/demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c.jpg b/demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3e1a1108cc4e3841f6cf6ae5a92d85b353116ea6
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5aedbfec63b1545ab841fa28df6079914d5f3d1932d4de3bddad5cde9ebba1d1
+size 648645
diff --git a/demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c_Left.jpg b/demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2f4238f08658b37a04ad760adf2e641ff3d74ba6
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bccb137d42e3bd82b8a75c6a831e2a998be7ffc6cefbef0fd508436a31825f1d
+size 420519
diff --git a/demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c_depth.jpg b/demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c_depth.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..04d578ed78f3fdbb6362745ac6ec083f9e47ef6f
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c_depth.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b12d82599dab8914ea691ce6b4da2b249ad85d06bde2ec1391f38bcef4cb7da2
+size 149687
diff --git a/demo_tasks/examples/relighting/02db5a81c222483058fecd76d62c5f7246b06ee4.jpg b/demo_tasks/examples/relighting/02db5a81c222483058fecd76d62c5f7246b06ee4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9d15a940a97b77a1474b43cede4aad507e497da5
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db5a81c222483058fecd76d62c5f7246b06ee4.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec7788f110bdb16541d20e9fc9799e7886e2e40e3bf3b1610b9dc226aa1f5257
+size 1098779
diff --git a/demo_tasks/examples/relighting/02db5a81c222483058fecd76d62c5f7246b06ee4_Left.jpg b/demo_tasks/examples/relighting/02db5a81c222483058fecd76d62c5f7246b06ee4_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7c4d1c392aab048764fcb3b3b9ccbff8f0e8818d
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db5a81c222483058fecd76d62c5f7246b06ee4_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64efc98e281f4924591328b5c32b745ccb60af45c8e65c0d08957c93f23c30f4
+size 607174
diff --git a/demo_tasks/examples/relighting/02db80670789cc6722f78747cf6ab8c292a898ab.jpg b/demo_tasks/examples/relighting/02db80670789cc6722f78747cf6ab8c292a898ab.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4361c10f8ce2426a3fa34f35e9fb3701b20e89f8
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db80670789cc6722f78747cf6ab8c292a898ab.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf36f4edbb55e4ef4acf1832b6d536c33ea3b54f5ef7aae0a1776a1b00baeae9
+size 688251
diff --git a/demo_tasks/examples/relighting/02db80670789cc6722f78747cf6ab8c292a898ab_Left.jpg b/demo_tasks/examples/relighting/02db80670789cc6722f78747cf6ab8c292a898ab_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ea6ec8a33e896e73a65d5cd29ad5e484b8101198
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db80670789cc6722f78747cf6ab8c292a898ab_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7251ef134f4ab69754388e694caf235d386e70db45dd097953a24ac0dbae435f
+size 676473
diff --git a/demo_tasks/examples/relighting/02db8a5f38464943d496bd3b475c36a3d65e7095.jpg b/demo_tasks/examples/relighting/02db8a5f38464943d496bd3b475c36a3d65e7095.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..99f7325507947c3cb8367b92f299406b92164096
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db8a5f38464943d496bd3b475c36a3d65e7095.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7187173e71c256381a56ec7e9175facd1f052e9bd7220fe692ed77c5cc4aab31
+size 680341
diff --git a/demo_tasks/examples/relighting/02db8a5f38464943d496bd3b475c36a3d65e7095_Left.jpg b/demo_tasks/examples/relighting/02db8a5f38464943d496bd3b475c36a3d65e7095_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9808df1888a2521b940474369759bc23ad1fdd81
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db8a5f38464943d496bd3b475c36a3d65e7095_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87744587031eb5b9287198195dcd8feceb68edf3c509da767055dc16427d9fef
+size 588417
diff --git a/demo_tasks/examples/relighting/02db96d3ce2531dc4d51dda52492b78cf3577c56.jpg b/demo_tasks/examples/relighting/02db96d3ce2531dc4d51dda52492b78cf3577c56.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..79bad43a2e5ac3041ab2b9dd9d044fc8ef23177e
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db96d3ce2531dc4d51dda52492b78cf3577c56.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8c4282d2f64845cf4365bbc2fa62c3786cc056b45e7ea029b0dac8ab40fafd2
+size 429074
diff --git a/demo_tasks/examples/relighting/02db96d3ce2531dc4d51dda52492b78cf3577c56_Left.jpg b/demo_tasks/examples/relighting/02db96d3ce2531dc4d51dda52492b78cf3577c56_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..79b28656f556d8e593c508d64d8f7fe5a31656e7
--- /dev/null
+++ b/demo_tasks/examples/relighting/02db96d3ce2531dc4d51dda52492b78cf3577c56_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0083fe59696f2640cba359ae28608e8dd5a19a2facc6525a78b1a698e5d3835c
+size 409539
diff --git a/demo_tasks/examples/relighting/02dc3e2cf9541a7d7ebff79cbf1fb0d95b4911e8.jpg b/demo_tasks/examples/relighting/02dc3e2cf9541a7d7ebff79cbf1fb0d95b4911e8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2e2987bf21f96dcd49a40fc7c0c6511b20760c59
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dc3e2cf9541a7d7ebff79cbf1fb0d95b4911e8.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:317f39e17541fe8a3a972210fb20c02138fa259f1ff98537b09ea524c0a12e2b
+size 847281
diff --git a/demo_tasks/examples/relighting/02dc3e2cf9541a7d7ebff79cbf1fb0d95b4911e8_Left.jpg b/demo_tasks/examples/relighting/02dc3e2cf9541a7d7ebff79cbf1fb0d95b4911e8_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a95edbcf8a3dbfc0abe79f2b921d945c06f4a868
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dc3e2cf9541a7d7ebff79cbf1fb0d95b4911e8_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac4f06ddf15f3e176b1f2e378473a5fd3e602f19ee2f483bb9242613a0e3ef3d
+size 580365
diff --git a/demo_tasks/examples/relighting/02dc42496c4ffdb2a8e101ed82943b26fc2d9d24.jpg b/demo_tasks/examples/relighting/02dc42496c4ffdb2a8e101ed82943b26fc2d9d24.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0a57f4fefe14d15fdd27627732ad5686a9149c07
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dc42496c4ffdb2a8e101ed82943b26fc2d9d24.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a34f5f2744bd053f8a3390030aba97d361518fbb97bceaf1342d87ebe89f3698
+size 673138
diff --git a/demo_tasks/examples/relighting/02dc42496c4ffdb2a8e101ed82943b26fc2d9d24_Left.jpg b/demo_tasks/examples/relighting/02dc42496c4ffdb2a8e101ed82943b26fc2d9d24_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3f879fdb962bc0c5ccccbd3a3f76a9826ce11425
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dc42496c4ffdb2a8e101ed82943b26fc2d9d24_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebaa6cbb0436dafdfba4f578f5fd0afa8902880ee279b40889f958705f9b3ad9
+size 464285
diff --git a/demo_tasks/examples/relighting/02dc4ebfd90dc80dbc0f4174679ff3828605ec9c.jpg b/demo_tasks/examples/relighting/02dc4ebfd90dc80dbc0f4174679ff3828605ec9c.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0620e62bd256be60d2eac64bde38fbae275c3cb6
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dc4ebfd90dc80dbc0f4174679ff3828605ec9c.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3135f34df6c674fcb4fa44658771414ec75ce689613078fb6106e3c9034419c7
+size 500242
diff --git a/demo_tasks/examples/relighting/02dc4ebfd90dc80dbc0f4174679ff3828605ec9c_Left.jpg b/demo_tasks/examples/relighting/02dc4ebfd90dc80dbc0f4174679ff3828605ec9c_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3b51d01c8fa385737c91c9f7c0e463d1218941db
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dc4ebfd90dc80dbc0f4174679ff3828605ec9c_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d063735b6a8307687c057c593893faea3c317a6ef372182c11a00795db40fab3
+size 489179
diff --git a/demo_tasks/examples/relighting/02dc6ca122863a582306a4f146b7bccb721a49e0.jpg b/demo_tasks/examples/relighting/02dc6ca122863a582306a4f146b7bccb721a49e0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d24366c6521a0cfd322643d9a21c13debfe8986d
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dc6ca122863a582306a4f146b7bccb721a49e0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f046d6b38405d7b184798c39421b02d20d2edcbd5f026c6aef1e6b25ab0465c4
+size 467486
diff --git a/demo_tasks/examples/relighting/02dc6ca122863a582306a4f146b7bccb721a49e0_Left.jpg b/demo_tasks/examples/relighting/02dc6ca122863a582306a4f146b7bccb721a49e0_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ca84003edc7b6d4c48cd35094915b0af711a2e9c
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dc6ca122863a582306a4f146b7bccb721a49e0_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13ad7983c4e9d5cec97035617b700f95805797b16e40c3a58096eab7f810b079
+size 475745
diff --git a/demo_tasks/examples/relighting/02dca7ccfad757fd596d33563d06b3ab7836d5af.jpg b/demo_tasks/examples/relighting/02dca7ccfad757fd596d33563d06b3ab7836d5af.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..199813ab9bfa49c144b7ff3eca8622c14f501a70
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dca7ccfad757fd596d33563d06b3ab7836d5af.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49586eff1f72c87a887eb8b180e6c8bc261356df5b4b3fa461082215883c55b0
+size 762248
diff --git a/demo_tasks/examples/relighting/02dca7ccfad757fd596d33563d06b3ab7836d5af_Left.jpg b/demo_tasks/examples/relighting/02dca7ccfad757fd596d33563d06b3ab7836d5af_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e048690d5519dc1f9ff548b3e8ab53f298c5c321
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dca7ccfad757fd596d33563d06b3ab7836d5af_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:659218378f73772dad278e5ef800ebf0ba25a7da9012ff5169ddca69a8eb39f4
+size 509237
diff --git a/demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8.jpg b/demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d1fefd47c2a62e249e79798eaa93008c990b64e4
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a33293056f946e28ead4dee06cca620214a61443566a84f9f79a4f6515cdfef
+size 798678
diff --git a/demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8_Left.jpg b/demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6cc7aa477e1ab61edee830caa074c1cbb94400f7
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3121c768f74edf869d999bea78401d2a50651cfb62f037bc1c4584ec35f5e3f
+size 465594
diff --git a/demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8_depth.jpg b/demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8_depth.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..28b47dc7d0d2b8cea432b3f4bf07614185cbeb0d
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8_depth.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94a660cd9e8cb4a8af6bf233ab4e7280defe636a7d8ae5ed6f97bae2c647399a
+size 120716
diff --git a/demo_tasks/examples/relighting/02dcd82122ffe344c8d7c289dc770febb5121153.jpg b/demo_tasks/examples/relighting/02dcd82122ffe344c8d7c289dc770febb5121153.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b91d73a67888323a2325a0f32623398f85e87b1c
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dcd82122ffe344c8d7c289dc770febb5121153.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27dd060d2275d4e4de91e575702685cf7d89888eeafb84e46e656e7cac3222e2
+size 600295
diff --git a/demo_tasks/examples/relighting/02dcd82122ffe344c8d7c289dc770febb5121153_Left.jpg b/demo_tasks/examples/relighting/02dcd82122ffe344c8d7c289dc770febb5121153_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ddcac165bc601cbf4b6fd2c52501ab3199281492
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dcd82122ffe344c8d7c289dc770febb5121153_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aad8670041b06806b741695b7e370884ed10c15d9f2985573e0cee48713fe115
+size 448653
diff --git a/demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4.jpg b/demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c6cbc70258d1617fd2d438d0ea308ad12e8afd36
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:233759630ba00778d1064d8afa792a29bc3920371de24886a97b8b5319e928fd
+size 410542
diff --git a/demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4_Left.jpg b/demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8dafaa865c473014a77303b9c06d49cc1edd8f4d
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a557003d98a92ae58f27bb100b8dc240bd22e7747853c93540fa1a99dc8d5c8
+size 458838
diff --git a/demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4_depth.jpg b/demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4_depth.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..930bf7c405f5af38945a0419b223bc6a502bde53
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4_depth.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa47d1541ee88869cde084ffdb19601d328a2cfd1b1855a1c237a250aa234266
+size 144574
diff --git a/demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657.jpg b/demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b566ed6dd1a5af9f87af5ec6b2aa0cc6d5ebacd6
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:324e5643803465d5087b36482acf3db1e538dc50160a403a3ee06ebae56a7263
+size 947801
diff --git a/demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657_Left.jpg b/demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..76ef7ff84de90c06a39e52984a6527a9d76cfcaa
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b382a863a59a32a36fad53aab90dd8d908867497758d642c01cb30de78936d36
+size 455281
diff --git a/demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657_depth.jpg b/demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657_depth.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d212ad4c9b04b2135e9f591bd2fd1f5a067ed281
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657_depth.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71688ca99fcb7044781e2645d730b3afa1a700e0a05f8b85ca789a18b70fa888
+size 125769
diff --git a/demo_tasks/examples/relighting/02dd6a91d0d1d17a9f06e999654b541b555da242.jpg b/demo_tasks/examples/relighting/02dd6a91d0d1d17a9f06e999654b541b555da242.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2a3f53622f5ff7cf2fbf13e3052fa4b2b9790bd1
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd6a91d0d1d17a9f06e999654b541b555da242.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1637ef1b92e3d2f20e61c53094129aca1d780cefbd489f4d935dd18c248a597
+size 543184
diff --git a/demo_tasks/examples/relighting/02dd6a91d0d1d17a9f06e999654b541b555da242_Left.jpg b/demo_tasks/examples/relighting/02dd6a91d0d1d17a9f06e999654b541b555da242_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9df85ca839f62b8c0cab36f766f19d3fc67f4899
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd6a91d0d1d17a9f06e999654b541b555da242_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6eda073c04a7c8edf5db489ba389ee866a47e6e2312875a8b7434db18a5af5c1
+size 349193
diff --git a/demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c.jpg b/demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b95518ea681413bd7f29035d4fdc7ae351372afd
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b08a1993f9624fbed4b7871af4f11aa586b9e6dafe345bf2e06f63d392b5556
+size 953469
diff --git a/demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c_Left.jpg b/demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..37a3b4c9feae027816f7fe3eab689c8d32ab9088
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf36c8b46f8c1e83465fff673ec8efd4b0cb4db8315d71ca4273bcb4cdd4a26
+size 484092
diff --git a/demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c_Left_2.jpg b/demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c_Left_2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fe13da1aac394ce41312fa27cbfd090a6cba380a
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c_Left_2.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8becad40d8307c3c91f267fdd1f9c1a317d4350d5b34d7c4376af0eb37c1f195
+size 189630
diff --git a/demo_tasks/examples/relighting/02dd79a669a4522f1d5631d75c14243f927848b8.jpg b/demo_tasks/examples/relighting/02dd79a669a4522f1d5631d75c14243f927848b8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bc77db4dc97c1ce1630723c7c19f3afd72842c97
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd79a669a4522f1d5631d75c14243f927848b8.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6faed85797a5eab8dcf8a969ff8e8a07cdc427dc0eab8b0629cbc21766d67b4a
+size 339045
diff --git a/demo_tasks/examples/relighting/02dd79a669a4522f1d5631d75c14243f927848b8_Left.jpg b/demo_tasks/examples/relighting/02dd79a669a4522f1d5631d75c14243f927848b8_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1633d50974ed423858a5c37169f5e18f956576a5
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd79a669a4522f1d5631d75c14243f927848b8_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfbfdb3eb72ee117e7fa29910f3dbe5cc5ed3917d7376d034265ca39bd116093
+size 478405
diff --git a/demo_tasks/examples/relighting/02dd9913f85a62d9c1587b00f610cc753ebad649.jpg b/demo_tasks/examples/relighting/02dd9913f85a62d9c1587b00f610cc753ebad649.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..29bd54861093acf1ccccadd178158b2194e4497e
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd9913f85a62d9c1587b00f610cc753ebad649.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5e4cd660fe25ba5f14c98fbe30de2ef10daf21ce72f1c2864c9cb47a8dd7adf
+size 347773
diff --git a/demo_tasks/examples/relighting/02dd9913f85a62d9c1587b00f610cc753ebad649_Left.jpg b/demo_tasks/examples/relighting/02dd9913f85a62d9c1587b00f610cc753ebad649_Left.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b8edcfa43afd062e880c15b08c400d8cab2d6740
--- /dev/null
+++ b/demo_tasks/examples/relighting/02dd9913f85a62d9c1587b00f610cc753ebad649_Left.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91cd31ca17466e21f5b3057864c7eb9692f5d7edb3d9bf62479759852e852660
+size 562595
diff --git a/demo_tasks/examples/tryon/00555_00.jpg b/demo_tasks/examples/tryon/00555_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..379d3729a16933c5770f12cd29057ec5c74c783a
--- /dev/null
+++ b/demo_tasks/examples/tryon/00555_00.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c42ed839726ac35019242fdf24aecbf19a03b30bca562af617c2d85acc5e7e9
+size 76216
diff --git a/demo_tasks/examples/tryon/00555_00_tryon_catvton_0.jpg b/demo_tasks/examples/tryon/00555_00_tryon_catvton_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..895bb713ca2e409ee357c0ab6d054add9f620b19
--- /dev/null
+++ b/demo_tasks/examples/tryon/00555_00_tryon_catvton_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3eae6aa858337bb2d5b54411a85c0eb42568045a7688474ec31b825774d6a59
+size 108294
diff --git a/demo_tasks/examples/tryon/00700_00.jpg b/demo_tasks/examples/tryon/00700_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..34d940e0782e4fb94758c83947bb91d726631fa3
--- /dev/null
+++ b/demo_tasks/examples/tryon/00700_00.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e35ebbbd6aeee80cd57d95045a867a19fbe4e780c6ddb0aa98f5b97d0a4c96e2
+size 106583
diff --git a/demo_tasks/examples/tryon/00700_00_tryon_catvton_0.jpg b/demo_tasks/examples/tryon/00700_00_tryon_catvton_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a379c71b9e891d0ab93f3d2f3efbe0f5203c6bee
--- /dev/null
+++ b/demo_tasks/examples/tryon/00700_00_tryon_catvton_0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c50b271342ba91d92d2bc448a0030bd7165b7665c4cef6ac77536010ede4dbb1
+size 87827
diff --git a/demo_tasks/examples/tryon/03673_00.jpg b/demo_tasks/examples/tryon/03673_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..461f4f62bb39feafce1100eb16274895166f5bd5
--- /dev/null
+++ b/demo_tasks/examples/tryon/03673_00.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ac66886bdf6de03dc0ba5880be2be8f680ac06b1a419851df243472b5b49229
+size 80924
diff --git a/demo_tasks/examples/tryon/12265_00.jpg b/demo_tasks/examples/tryon/12265_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8a30f22d2b92980900b7ac49bb8d42ccbe0fbf77
--- /dev/null
+++ b/demo_tasks/examples/tryon/12265_00.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:498b20da5f87e17e504510f3271851e99f1d96b42b98ea72d4d3ea653e16d009
+size 42793
diff --git a/demo_tasks/gradio_tasks.py b/demo_tasks/gradio_tasks.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4a091b1193ab1c011954bed194d49bd2f89d272
--- /dev/null
+++ b/demo_tasks/gradio_tasks.py
@@ -0,0 +1,190 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+import numpy as np
+
+
+def generate_paths_from_id(file_id: str, prompt: str) -> dict:
+ """
+ 根据文件ID自动生成所有相关文件的路径
+
+ Args:
+ file_id: str - 文件的唯一标识符 (例如: '5c79f1ea582c3faa093d2e09b906321d')
+
+ Returns:
+ dict: 包含所有生成路径的字典
+ """
+ base_path = 'demo_tasks/examples'
+
+ paths = {
+ 'target': f'{base_path}/{file_id}/{file_id}.jpg',
+ 'depth': f'{base_path}/{file_id}/{file_id}_depth-anything-v2_Large.jpg',
+ 'canny': f'{base_path}/{file_id}/{file_id}_canny_100_200_512.jpg',
+ 'hed': f'{base_path}/{file_id}/{file_id}_hed_512.jpg',
+ 'normal': f'{base_path}/{file_id}/{file_id}_dsine_normal_map.jpg',
+ 'openpose': f'{base_path}/{file_id}/{file_id}_openpose_fullres_nohand.jpg',
+ 'style_target': f'{base_path}/{file_id}/{file_id}_instantx-style_0.jpg',
+ 'style_source': f'{base_path}/{file_id}/{file_id}_instantx-style_0_style.jpg',
+ 'foreground': f'{base_path}/{file_id}/{file_id}_ben2-background-removal.jpg',
+ 'background': f'{base_path}/{file_id}/{file_id}_ben2-background-removal.jpg',
+ 'mask': f'{base_path}/{file_id}/{file_id}_qwen2_5_mask.jpg',
+ 'sam2_mask': f'{base_path}/{file_id}/{file_id}_sam2_mask.jpg',
+ 'prompt': prompt
+ }
+
+ return paths
+
+
+dense_prediction_data = [
+ generate_paths_from_id('2b74476568f7562a6aa832d423132ed3', prompt="Group photo of five young adults enjoying a rooftop gathering at dusk. The group is positioned in the center, with three women and two men smiling and embracing. The woman on the far left wears a floral top and holds a drink, looking slightly to the right. Next to her, a woman in a denim jacket stands close to a woman in a white blouse, both smiling directly at the camera. The fourth woman, in an orange top, stands close to the man on the far right, who wears a red shirt and blue blazer, smiling broadly. The background features a cityscape with a tall building and string lights hanging overhead, creating a warm, festive atmosphere. Soft natural lighting, warm color palette, shallow depth of field, intimate and joyful mood, slightly blurred background, urban rooftop setting, evening ambiance."),
+ generate_paths_from_id('de5a8b250bf407aa7e04913562dcba90', prompt="Close-up photo of a refreshing peach iced tea in a clear plastic cup, centrally positioned on a wooden surface. The drink is garnished with fresh mint leaves and ice cubes, with a yellow and white striped straw angled to the left. Surrounding the cup are whole and sliced peaches, scattered across the table, with their vibrant orange flesh and brown skin visible. The background is softly blurred, featuring bokeh effects from sunlight filtering through green foliage, creating a warm and inviting atmosphere. High contrast, natural lighting, shallow depth of field, vibrant color palette, photorealistic, glossy texture, summer vibe, visually balanced composition."),
+ generate_paths_from_id('2c4e256fa512cb7e7f433f4c7f9101de', prompt="A digital illustration of a small orange tabby kitten sitting in the center of a sunlit meadow, surrounded by white daisies with yellow centers. The kitten has large, expressive eyes and a pink nose, positioned directly facing the viewer. The daisies are scattered around the kitten, with some in the foreground and others in the background, creating a sense of depth. The background is softly blurred, emphasizing the kitten and flowers, with warm, golden sunlight filtering through, casting a gentle glow. Digital art, photorealistic, shallow depth of field, soft natural lighting, warm color palette, high contrast, serene, whimsical, visually balanced, intimate, detailed textures."),
+ generate_paths_from_id('5bf755ed9dbb9b3e223e7ba35232b06e', prompt="A whimsical digital illustration of an astronaut emerging from a cracked eggshell on a barren, moon-like surface. The astronaut is centrally positioned, wearing a white space suit with a reflective visor helmet, holding a small yellow flag with the words 'HELLO WORLD' in black text. The eggshell is partially buried in the textured, rocky terrain, with scattered rocks and dust around it. The background is a blurred, dark blue gradient with circular bokeh effects, suggesting a distant, starry space environment. Soft, warm lighting from the top right creates a gentle glow on the astronaut's suit and the flag, adding a sense of discovery and playfulness. Digital illustration, shallow depth of field, soft focus, warm color palette, whimsical, surreal, high contrast, glossy textures, imaginative, visually balanced."),
+ generate_paths_from_id('9c565b1aad76b22f5bb836744a93561a', prompt="Majestic photo of a golden eagle perched on a rocky outcrop in a mountainous landscape. The eagle is positioned in the right foreground, facing left, with its sharp beak and keen eyes prominently visible. Its plumage is a mix of dark brown and golden hues, with intricate feather details. The background features a soft-focus view of snow-capped mountains under a cloudy sky, creating a serene and grandiose atmosphere. The foreground includes rugged rocks and patches of green moss. Photorealistic, medium depth of field, soft natural lighting, cool color palette, high contrast, sharp focus on the eagle, blurred background, tranquil, majestic, wildlife photography."),
+ generate_paths_from_id('9d39f75f1f728e097efeaff39acb4710', prompt="Serene beach scene at sunrise with a clear blue sky and calm ocean waves. The foreground features sandy beach with footprints leading towards the water, and a large, twisted pine tree with lush green foliage arching over the scene from the right. The sun is rising on the horizon, casting a warm glow and long shadows on the sand. In the background, a rocky outcrop covered with greenery is visible to the left. The ocean stretches out to the right, with gentle waves lapping at the shore. Photorealistic, high contrast, vibrant colors, natural lighting, warm color palette, tranquil atmosphere, balanced composition, sharp details, inviting and peaceful."),
+ generate_paths_from_id('012cd3921e1f97d761eeff580f918ff9', prompt="Portrait of a young woman with long dark hair styled in an elegant updo, smiling directly at the camera. She is wearing a white, floral-embroidered strapless dress, positioned slightly to the right of the frame. Her makeup is subtle yet polished, with a focus on her eyes and lips. She wears a pair of dangling, ornate earrings. Surrounding her are vibrant red roses and lush green foliage, creating a natural and romantic backdrop. The lighting is soft and natural, highlighting her features and casting gentle shadows. The image has a shallow depth of field, with the background softly blurred, emphasizing the subject. Photorealistic, warm color palette, high contrast, intimate, serene, visually balanced."),
+ generate_paths_from_id('53b3f413257bee9e499b823b44623b1a', prompt="A stunning photograph of a red fox standing in a snowy landscape, gazing intently at a small, icy stream in the foreground. The fox is positioned slightly to the left, its vibrant orange fur contrasting with the white snow. Surrounding the fox are delicate branches covered in frost, adding texture to the scene. Above, icicles hang from the branches, catching the light and creating a sense of cold. The reflection of the fox is visible in the still water, enhancing the symmetry of the composition. The background is softly blurred, with hints of blue and white suggesting a serene winter environment. High contrast, sharp focus on the fox, soft natural lighting, cool color palette with warm highlights, photorealistic, tranquil, visually balanced, ethereal winter atmosphere."),
+ generate_paths_from_id('78dc6506367d7aa43fe42a898abbfe4a', prompt="Ethereal digital illustration of a winged woman standing beside a majestic lion on a rocky outcrop. The woman, positioned slightly to the left, wears a flowing, cream-colored gown with intricate detailing and a red sash at the waist. Her long, dark hair cascades down her back, and she holds a golden, ornate vessel in her right hand. The lion stands to her right, its mane richly textured and its gaze directed forward. The background features a vibrant sky with fluffy clouds and a bright sun, casting a warm glow over the scene. The foreground includes delicate orange flowers and tall grasses, adding a touch of nature to the composition. Digital art, high contrast, vivid color palette, soft lighting, surreal and fantastical atmosphere, detailed textures, dynamic composition, harmonious balance, ethereal and majestic mood."),
+ generate_paths_from_id('79f2ee632f1be3ad64210a641c4e201b', prompt="A serene portrait of a young woman with long dark hair, wearing a beige dress with intricate gold embroidery, standing in a softly lit room. She holds a large bouquet of pale pink roses in a black box, positioned in the center of the frame. The background features a tall green plant to the left and a framed artwork on the wall to the right. A window on the left allows natural light to gently illuminate the scene. The woman gazes down at the bouquet with a calm expression. Soft natural lighting, warm color palette, high contrast, photorealistic, intimate, elegant, visually balanced, serene atmosphere."),
+ generate_paths_from_id('88d0ba30e2c0bc4401cf2633cac162d4', prompt="A serene cinematic still of a woman with long, platinum blonde hair sitting on a rocky shore, facing the ocean. She wears a long, dark green dress with intricate detailing on the sleeves. Her expression is joyful, looking upwards towards a black bird in mid-flight, positioned in the upper left of the frame. The ocean waves gently crash in the background, creating a soft, rhythmic pattern. The sky is overcast, casting a diffused, cool light over the scene. Cinematic still, medium depth of field, soft natural lighting, muted color palette, ethereal and tranquil atmosphere, visually balanced composition, gentle contrast, serene and contemplative mood."),
+ generate_paths_from_id('93bc1c43af2d6c91ac2fc966bf7725a2', prompt="Illustration of a young woman with long, wavy red hair sitting at a round wooden table in a sunlit café. She is positioned slightly to the right, holding a white cup in her right hand, looking directly at the viewer with a gentle smile. She wears a white long-sleeve top and blue jeans. On the table, there is a croissant, a bowl of jam, a cup of coffee, and an open magazine. The background features large windows with a view of a street lined with trees and parked cars, blurred to suggest motion. Potted plants are visible outside and inside the café. Warm, natural lighting, soft shadows, vibrant color palette, photorealistic textures, cozy and inviting atmosphere, digital illustration, high contrast, serene and relaxed mood."),
+ generate_paths_from_id('10d7dcae5240b8cc8c9427e876b4f462', prompt="A stylish winter portrait of a young woman in a snowy landscape, wearing a brown fur coat, black turtleneck, and brown leather pants. She is positioned slightly to the right, looking down at her smartphone with a focused expression. A wide-brimmed brown cowboy hat sits atop her head. To her left, a Siberian Husky with striking blue eyes stands attentively, its fur a mix of black, white, and grey. The background features a blurred, desolate winter scene with bare trees and dry grasses, creating a serene and isolated atmosphere. The foreground includes snow-covered ground and sparse, dried plants. Photorealistic, medium depth of field, soft natural lighting, muted color palette, high contrast, fashion photography, sharp focus on the subject, tranquil, elegant, visually balanced."),
+ generate_paths_from_id('0fdaecdb7906a1bf0d6e202363f15de3', prompt="A whimsical digital illustration of a retro-futuristic robot standing in a cozy, softly lit room. The robot, positioned centrally, has a metallic, spherical head with glowing red eyes and large headphones. It wears a brown leather vest and shorts, with articulated black arms and legs, and holds a vintage cassette tape in its right hand. The robot's feet are clad in brown and black boots. In the background, a blurred window with white frames is visible on the left, and a neon sign reading \"biogarty\" glows red on the right wall. Potted plants are placed on the floor and on a table in the background, adding a touch of greenery. The floor is wooden, and a laptop is partially visible in the left foreground. The scene is bathed in warm, natural light with a soft focus, creating a nostalgic and playful atmosphere. Digital illustration, medium depth of field, soft natural lighting, warm color palette, retro-futuristic, whimsical, visually balanced, glossy textures, cozy interior setting."),
+]
+
+
+dense_prediction = [
+ dict(
+ name='Image to Depth',
+ image_type=["target", "depth"]),
+ dict(
+ name='Image to Canny',
+ image_type=["target", "canny"]),
+ dict(
+ name='Image to Hed',
+ image_type=["target", "hed"]),
+ dict(
+ name='Image to Normal',
+ image_type=["target", "normal"]),
+ dict(
+ name='Image to Pose',
+ image_type=["target", "openpose"]),
+]
+dense_prediction_text = [[x['name']] for x in dense_prediction]
+
+conditional_generation = [
+ dict(
+ name='Depth to Image',
+ image_type=["depth", "target"]),
+ dict(
+ name='Foreground to Image',
+ image_type=["foreground", "target"]),
+ dict(
+ name='Background to Image',
+ image_type=["background", "target"]),
+ dict(
+ name='Canny to Image',
+ image_type=["canny", "target"]),
+ dict(
+ name='Hed to Image',
+ image_type=["hed", "target"]),
+ dict(
+ name='Normal to Image',
+ image_type=["normal", "target"]),
+ dict(
+ name='Pose to Image',
+ image_type=["openpose", "target"]),
+ dict(
+ name='Mask to Image',
+ image_type=["mask", "target"]),
+ dict(
+ name='SAM2 to Image',
+ image_type=["sam2_mask", "target"]),
+]
+conditional_generation_text = [[x['name']] for x in conditional_generation]
+
+
+def process_dense_prediction_tasks(x):
+ for task in dense_prediction:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = [x for x in dense_prediction_data if all([x.get(t, None) is not None and os.path.exists(x[t]) for t in image_type])]
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ rets.append(Image.open(image[t]))
+
+ content_prompt = ""
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = 0.7
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
+
+
+def process_conditional_generation_tasks(x):
+ for task in conditional_generation:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = [x for x in dense_prediction_data if all([x.get(t, None) is not None and os.path.exists(x[t]) for t in image_type])]
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ if t == "foreground":
+ mask = Image.open(image[t])
+ target_image = Image.open(image['target']).convert('RGB')
+ mask_np = np.array(mask).astype(np.float32) / 255.0
+ result = Image.fromarray((np.array(target_image) * mask_np).astype(np.uint8))
+ rets.append(result)
+ elif t == "background":
+ mask = Image.open(image[t])
+ target_image = Image.open(image['target']).convert('RGB')
+ mask_np = np.array(mask).astype(np.float32) / 255.0
+ mask_np = 1 - mask_np
+ result = Image.fromarray((np.array(target_image) * mask_np).astype(np.uint8))
+ rets.append(result)
+ else:
+ rets.append(Image.open(image[t]))
+
+ content_prompt = get_content_instruction() + images[-1]['prompt']
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ if 'Pose to Image' == task['name']:
+ upsampling_noise = 0.3
+ else:
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
\ No newline at end of file
diff --git a/demo_tasks/gradio_tasks_editing.py b/demo_tasks/gradio_tasks_editing.py
new file mode 100644
index 0000000000000000000000000000000000000000..9902eaf214a8dbcde8960b59590d94e657f8ac48
--- /dev/null
+++ b/demo_tasks/gradio_tasks_editing.py
@@ -0,0 +1,61 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+
+
+task_instruction = "In each row, a logical task is demonstrated to achieve [IMAGE2] a high-aesthetic image based on [IMAGE1] an aesthetically pleasing photograph. Each row shows a process to edit the image with the given editing instruction."
+editing_instruction = "The editing instruction in the last row is: "
+editing = [
+ dict(
+ name='add',
+ images=[
+ os.path.join('demo_tasks/examples/omniedit/task_obj_add_273266.jpg'),
+ os.path.join('demo_tasks/examples/omniedit/task_obj_add_273266_edit.jpg'),
+ os.path.join('demo_tasks/examples/omniedit/task_obj_add_528329.jpg'),
+ os.path.join('demo_tasks/examples/omniedit/task_obj_add_528329_edit.jpg'),
+ ],
+ grid_h=2,
+ grid_w=2,
+ task_prompt=task_instruction + " " + editing_instruction + " Add a large hawk perched on a branch in the foreground. <\editing instruction>",
+ content_prompt="",
+ ),
+ dict(
+ name='remove',
+ images=[
+ os.path.join('demo_tasks/examples/omniedit/task_obj_add_528329_edit.jpg'),
+ os.path.join('demo_tasks/examples/omniedit/task_obj_add_528329.jpg'),
+ os.path.join('demo_tasks/examples/omniedit/task_obj_remove_855511_edit.jpg'),
+ os.path.join('demo_tasks/examples/omniedit/task_obj_remove_855511.jpg'),
+ ],
+ grid_h=2,
+ grid_w=2,
+ task_prompt=task_instruction + " " + editing_instruction + " Remove a small, orange and white monkey with black face sitting on a branch in the tree. <\editing instruction>",
+ content_prompt="",
+ ),
+]
+editing_text = [[x['name']] for x in editing]
+
+
+def process_editing_tasks(x):
+ for task in editing:
+ if task['name'] == x[0]:
+ task_prompt = task['task_prompt']
+ content_prompt = task['content_prompt']
+
+ images = task['images']
+ rets = []
+ for image in images:
+ rets.append(Image.open(image))
+
+ grid_h = task['grid_h']
+ grid_w = task['grid_w']
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
diff --git a/demo_tasks/gradio_tasks_editing_subject.py b/demo_tasks/gradio_tasks_editing_subject.py
new file mode 100644
index 0000000000000000000000000000000000000000..54976f5082a1323be473ea5255d57731d8909562
--- /dev/null
+++ b/demo_tasks/gradio_tasks_editing_subject.py
@@ -0,0 +1,69 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+
+
+task_instruction = "Every row demonstrates how to transform [IMAGE1] a reference image showcasing the dominant object, [IMAGE2] a high-quality image into [IMAGE3] a high-quality image through a logical approach."
+content_instruction = "The last image of the final row displays: "
+editing_with_subject = [
+ dict(
+ name='Editing with Subject',
+ examples=[
+ dict(
+ images=[
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_reference.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_qwen_subject_replacement_1737373818845_1.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_target.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_reference.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_qwen_subject_replacement_1737377830929_2.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00004-of-00022-5419/data-00004-of-00022-5419_target.jpg'),
+ ],
+ grid_h=2,
+ grid_w=3,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "A sacred, serene marble religious sculpture. Perched on a rocky cliff overlooking the ocean, this item appears ethereal as the first light of dawn paints it in soft pink and gold hues, with waves crashing in the background.",
+ ),
+ dict(
+ images=[
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_reference.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_qwen_subject_replacement_1737373818845_1.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00004-of-00022-3633/data-00004-of-00022-3633_target.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_reference.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_qwen_subject_replacement_1737410088010_2.jpg'),
+ os.path.join('demo_tasks/examples/graph200k/editing/data-00012-of-00022-8475/data-00012-of-00022-8475_target.jpg'),
+ ],
+ grid_h=2,
+ grid_w=3,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "A crisp, golden lager in a glass. Nestled beside a flickering fireplace, it casts a cozy, amber glow on the rustic wooden floor of a mountain cabin, inviting sips after a day in the snow.",
+ )
+ ]
+ ),
+]
+editing_with_subject_text = [[x['name']] for x in editing_with_subject]
+
+
+def process_editing_with_subject_tasks(x):
+ for task in editing_with_subject:
+ if task['name'] == x[0]:
+ example = random.choice(task['examples'])
+ task_prompt = example['task_prompt']
+ content_prompt = example['content_prompt']
+
+ images = example['images']
+ rets = []
+ for image in images:
+ rets.append(Image.open(image))
+
+ grid_h = example['grid_h']
+ grid_w = example['grid_w']
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
diff --git a/demo_tasks/gradio_tasks_photodoodle.py b/demo_tasks/gradio_tasks_photodoodle.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b486e20597da75956064823f8cbc3b191a5efc6
--- /dev/null
+++ b/demo_tasks/gradio_tasks_photodoodle.py
@@ -0,0 +1,93 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+
+
+task_instruction ="Every row demonstrates how to transform [IMAGE1] an image with flawless clarity into [IMAGE2] an image with artistic doodle embellishments through a logical approach."
+content_instruction = "The photo doodle effect in the last row is: "
+photodoodle = [
+ dict(
+ name='sksmonstercalledlulu',
+ images=[
+ os.path.join('demo_tasks/examples/photodoodle/sksmonstercalledlulu/5.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmonstercalledlulu/5_blend.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmonstercalledlulu/6.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmonstercalledlulu/6_blend.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmonstercalledlulu/9.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmonstercalledlulu/9_blend.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "add a large pink furry monster called 'Lulu' behind the girl, hugging her. The monster should have a large single eye with a dark pupil, round pink ears, small white teeth, and fluffy texture. Position the monster so that its arms wrap gently around the girl from behind, with its head slightly leaning to the left above her. Make sure the monster's body is large and visible, overlapping the floor and partially obscuring the carpet pattern.",
+ ),
+ dict(
+ name='skspaintingeffects',
+ images=[
+ os.path.join('demo_tasks/examples/photodoodle/skspaintingeffects/12.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/skspaintingeffects/12_blend.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/skspaintingeffects/35.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/skspaintingeffects/35_blend.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/skspaintingeffects/37.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/skspaintingeffects/37_blend.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "add a cluster of colorful daisies to the left side of the kitten's face. Use alternating blue and orange petals with pink centers for each flower. Overlay a design of mixed purple and brown wavy shapes around the top and right sides of the image, creating an abstract artistic effect. Keep the rest of the background unchanged.",
+ ),
+ dict(
+ name='sksmagiceffects',
+ images=[
+ os.path.join('demo_tasks/examples/photodoodle/sksmagiceffects/29.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmagiceffects/29_blend.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmagiceffects/50.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmagiceffects/50_blend.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmagiceffects/24.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksmagiceffects/24_blend.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "add a large, yellow crescent moon to the top section of the circular structure. Place five large, yellow stars around the building.",
+ ),
+ dict(
+ name='sksedgeeffect',
+ images=[
+ os.path.join('demo_tasks/examples/photodoodle/sksedgeeffect/34.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksedgeeffect/34_blend.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksedgeeffect/1.jpg'),
+ os.path.join('demo_tasks/examples/photodoodle/sksedgeeffect/1_blend.jpg'),
+ ],
+ grid_h=2,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "add artistic flower shapes around the edge of the elderly woman. Use a purple flower with a black outline on the top right, a pink flower with a red outline on the top left, and a white flower with a black outline on the bottom right. Surround the woman's silhouette with a blue outline, inside a pink outline, creating a layered edge effect.",
+ )
+]
+photodoodle_text = [[x['name']] for x in photodoodle]
+
+
+def process_photodoodle_tasks(x):
+ for task in photodoodle:
+ if task['name'] == x[0]:
+ task_prompt = task['task_prompt']
+ content_prompt = task['content_prompt']
+
+ images = task['images']
+ rets = []
+ for image in images:
+ rets.append(Image.open(image))
+
+ grid_h = task['grid_h']
+ grid_w = task['grid_w']
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
diff --git a/demo_tasks/gradio_tasks_relighting.py b/demo_tasks/gradio_tasks_relighting.py
new file mode 100644
index 0000000000000000000000000000000000000000..1858d83dee14d95f6e8860d3d4e3050bcdcca721
--- /dev/null
+++ b/demo_tasks/gradio_tasks_relighting.py
@@ -0,0 +1,239 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+
+
+task_instruction = "Each row shows a process to manipulate the illumination of images and changes the background following the instruction."
+content_instruction = "Beautiful woman, the illumination comes from left side of the image, "
+relighting = [
+ dict(
+ name='sunset over sea',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02daa50ac59bb9eabcbe0d5304af880d941bffc3.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02daa50ac59bb9eabcbe0d5304af880d941bffc3_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db8a5f38464943d496bd3b475c36a3d65e7095.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db8a5f38464943d496bd3b475c36a3d65e7095_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db96d3ce2531dc4d51dda52492b78cf3577c56.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db96d3ce2531dc4d51dda52492b78cf3577c56_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "sunset over sea.",
+ ),
+ dict(
+ name='light and shadow',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db31cb32e74620523955b70807b3e11815451c_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dcd82122ffe344c8d7c289dc770febb5121153.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dcd82122ffe344c8d7c289dc770febb5121153_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "light and shadow.",
+ ),
+ dict(
+ name='sci-fi RGB glowing, cyberpunkw',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02db5a81c222483058fecd76d62c5f7246b06ee4.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db5a81c222483058fecd76d62c5f7246b06ee4_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db80670789cc6722f78747cf6ab8c292a898ab.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db80670789cc6722f78747cf6ab8c292a898ab_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dc3e2cf9541a7d7ebff79cbf1fb0d95b4911e8.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dc3e2cf9541a7d7ebff79cbf1fb0d95b4911e8_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "sci-fi RGB glowing, cyberpunk.",
+ ),
+ dict(
+ name='golden time',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02dc6ca122863a582306a4f146b7bccb721a49e0.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dc6ca122863a582306a4f146b7bccb721a49e0_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dc4ebfd90dc80dbc0f4174679ff3828605ec9c.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dc4ebfd90dc80dbc0f4174679ff3828605ec9c_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dca7ccfad757fd596d33563d06b3ab7836d5af.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dca7ccfad757fd596d33563d06b3ab7836d5af_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "golden time.",
+ ),
+ dict(
+ name='shadow from window',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "shadow from window.",
+ ),
+ dict(
+ name='soft studio lighting',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd6a91d0d1d17a9f06e999654b541b555da242.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd6a91d0d1d17a9f06e999654b541b555da242_Left.jpg'),
+ ],
+ grid_h=2,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "soft studio lighting.",
+ ),
+ dict(
+ name='evil, gothic, Yharnam',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02aee2a8df8f6e6f16ca4ec278203543656cecf1.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02aee2a8df8f6e6f16ca4ec278203543656cecf1_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02af9925c86c22b379e4e6d4f2762d66966ee281.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02af9925c86c22b379e4e6d4f2762d66966ee281_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd79a669a4522f1d5631d75c14243f927848b8.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd79a669a4522f1d5631d75c14243f927848b8_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "evil, gothic, Yharnam.",
+ ),
+ dict(
+ name='neon, Wong Kar-wai, warm',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02af99b6765a77a8f2ac87aa42d2f2453dcd590f.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02af99b6765a77a8f2ac87aa42d2f2453dcd590f_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02b02e2916bf2eb3608f5a806dc3b7ecbed3b649.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02b02e2916bf2eb3608f5a806dc3b7ecbed3b649_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "neon, Wong Kar-wai, warm.",
+ ),
+ dict(
+ name='home atmosphere, cozy bedroom illumination',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02db22466eb3bc19d6a10195e1b48fff696c1582.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02db22466eb3bc19d6a10195e1b48fff696c1582_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c3760bf08f00d9e2163248e2864f5e1a70d709.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c3760bf08f00d9e2163248e2864f5e1a70d709_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02af06c41208b31248e94da13166a675c862b003.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02af06c41208b31248e94da13166a675c862b003_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "home atmosphere, cozy bedroom illumination.",
+ ),
+ dict(
+ name='warm atmosphere, at home, bedroom',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02c39e8e82f4be91d24252c8bfbfdef033ec8a32.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c39e8e82f4be91d24252c8bfbfdef033ec8a32_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c5200cac1d0f19256232a09708ac47f6ddfab3.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c5200cac1d0f19256232a09708ac47f6ddfab3_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd6f77ccab6d63e7f2d7795f5d03180b46621c_Left_2.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "warm atmosphere, at home, bedroom.",
+ ),
+ dict(
+ name='natural lighting',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02dafead46f6d59172d8df216c1f5ad11f9899b5.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dafead46f6d59172d8df216c1f5ad11f9899b5_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dc42496c4ffdb2a8e101ed82943b26fc2d9d24.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dc42496c4ffdb2a8e101ed82943b26fc2d9d24_Left.jpg'),
+ ],
+ grid_h=2,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "natural lighting.",
+ ),
+ dict(
+ name='magic lit',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02dd9913f85a62d9c1587b00f610cc753ebad649.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02dd9913f85a62d9c1587b00f610cc753ebad649_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02afbcf084a1e35bda34c26d2271d56b6a1c621e.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02afbcf084a1e35bda34c26d2271d56b6a1c621e_Left.jpg'),
+ ],
+ grid_h=2,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "magic lit.",
+ ),
+ dict(
+ name='sunshine from window',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02c53f12ec3d4a9a16d9b0ca3f7773ad2222100c.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c53f12ec3d4a9a16d9b0ca3f7773ad2222100c_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c6c0f92a672110ff86bd12f4aa0d0083c9cf6b.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c6c0f92a672110ff86bd12f4aa0d0083c9cf6b_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c5cc03d46ce15494caaf3d65a2b2c7e09089f2.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c5cc03d46ce15494caaf3d65a2b2c7e09089f2_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "sunshine from window.",
+ ),
+ dict(
+ name='neon light, city',
+ images=[
+ os.path.join('demo_tasks/examples/relighting/02c7df6c0decd3d542e25089a0af6afe1e070b6a.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c7df6c0decd3d542e25089a0af6afe1e070b6a_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c77b643fbdaec82912634655426553f3d7a537.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c77b643fbdaec82912634655426553f3d7a537_Left.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c73157a981e0ee669ca8125018efbdda1e1483.jpg'),
+ os.path.join('demo_tasks/examples/relighting/02c73157a981e0ee669ca8125018efbdda1e1483_Left.jpg'),
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction + "neon light, city.",
+ ),
+]
+relighting_text = [[x['name']] for x in relighting]
+
+
+def process_relighting_tasks(x):
+ for task in relighting:
+ if task['name'] == x[0]:
+ task_prompt = task['task_prompt']
+ content_prompt = task['content_prompt']
+
+ images = task['images']
+ rets = []
+ for image in images:
+ rets.append(Image.open(image))
+
+ grid_h = task['grid_h']
+ grid_w = task['grid_w']
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = 0.6
+ steps = 50
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
diff --git a/demo_tasks/gradio_tasks_restoration.py b/demo_tasks/gradio_tasks_restoration.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a84ca75934b72516036a00f1d85e16babf92d0f
--- /dev/null
+++ b/demo_tasks/gradio_tasks_restoration.py
@@ -0,0 +1,95 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+from .gradio_tasks import dense_prediction_data
+from degradation_utils import add_degradation
+import numpy as np
+
+
+degradation_list = [
+ # blur
+ "blur",
+ "compression",
+ "SRx2",
+ "SRx4",
+ "pixelate",
+ "Defocus",
+ "GaussianBlur",
+ # sharpen
+ "oversharpen",
+ # nosie
+ "GaussianNoise",
+ "PoissonNoise",
+ "SPNoise",
+ # mosaic
+ "mosaic",
+ # contrast
+ "contrast_strengthen",
+ "contrast_weaken",
+ # quantization
+ "quantization",
+ "JPEG",
+ # light
+ "brighten",
+ "darken",
+ "LowLight",
+ # color
+ "saturate_strengthen",
+ "saturate_weaken",
+ "gray",
+ "ColorDistortion",
+ # infilling
+ "Inpainting",
+ # rotate
+ "rotate180",
+ # other
+ "Barrel",
+ "Pincushion",
+ "Elastic",
+ # spacial effect
+ "Rain",
+ "Frost",
+]
+
+
+image_restoration = [dict(name=degradation, image_type=[degradation, "target"]) for degradation in degradation_list]
+image_restoration_text = [[x['name']] for x in image_restoration]
+
+
+def process_image_restoration_tasks(x):
+ for task in image_restoration:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = dense_prediction_data
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ if t == "target":
+ rets.append(Image.open(image["target"]))
+ else:
+ deg_image, _ = add_degradation(np.array(Image.open(image["target"])), deg_type=t)
+ rets.append(deg_image)
+
+ content_prompt = get_content_instruction() + images[-1]['prompt']
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
diff --git a/demo_tasks/gradio_tasks_style.py b/demo_tasks/gradio_tasks_style.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8ef7ba394e59152a1954dc019e9848819cea774
--- /dev/null
+++ b/demo_tasks/gradio_tasks_style.py
@@ -0,0 +1,120 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+from .gradio_tasks import dense_prediction_data
+
+
+style_transfer = [
+ dict(
+ name='Style Transfer',
+ image_type=["target", "style_source", "style_target"]),
+]
+style_transfer_text = [[x['name']] for x in style_transfer]
+
+
+style_condition_fusion = [
+ dict(
+ name='Canny+Style to Image',
+ image_type=["canny", "style_source", "style_target"]),
+ dict(
+ name='Depth+Style to Image',
+ image_type=["depth", "style_source", "style_target"]),
+ dict(
+ name='Hed+Style to Image',
+ image_type=["hed", "style_source", "style_target"]),
+ dict(
+ name='Normal+Style to Image',
+ image_type=["normal", "style_source", "style_target"]),
+ dict(
+ name='Pose+Style to Image',
+ image_type=["openpose", "style_source", "style_target"]),
+ dict(
+ name='SAM2+Style to Image',
+ image_type=["sam2_mask", "style_source", "style_target"]),
+ dict(
+ name='Mask+Style to Image',
+ image_type=["mask", "style_source", "style_target"]),
+]
+style_condition_fusion_text = [[x['name']] for x in style_condition_fusion]
+
+
+def process_style_transfer_tasks(x):
+ for task in style_transfer:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = [x for x in dense_prediction_data if all([(x.get(t, None) is not None and os.path.exists(x[t])) for t in image_type])]
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ if t == "style_source":
+ target = Image.open(image["style_target"])
+ source = Image.open(image[t])
+ source = source.resize(target.size)
+ rets.append(source)
+ else:
+ rets.append(Image.open(image[t]))
+
+ content_prompt = ""
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
+
+
+def process_style_condition_fusion_tasks(x):
+ for task in style_condition_fusion:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = [x for x in dense_prediction_data if all([(x.get(t, None) is not None and os.path.exists(x[t])) for t in image_type])]
+ x = dense_prediction_data[0]
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ if t == "style_source":
+ target = Image.open(image["style_target"])
+ source = Image.open(image[t])
+ source = source.resize(target.size)
+ rets.append(source)
+ else:
+ rets.append(Image.open(image[t]))
+
+ content_prompt = ""
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
diff --git a/demo_tasks/gradio_tasks_subject.py b/demo_tasks/gradio_tasks_subject.py
new file mode 100644
index 0000000000000000000000000000000000000000..36f333c65a70b41f4649c0eefea710641d70ff98
--- /dev/null
+++ b/demo_tasks/gradio_tasks_subject.py
@@ -0,0 +1,339 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+from degradation_utils import add_degradation
+import numpy as np
+
+
+degradation_list = [
+ # blur
+ "blur",
+ "compression",
+ "SRx2",
+ "SRx4",
+ "pixelate",
+ "Defocus",
+ "GaussianBlur",
+ # sharpen
+ "oversharpen",
+ # nosie
+ "GaussianNoise",
+ "PoissonNoise",
+ "SPNoise",
+ # mosaic
+ "mosaic",
+ # contrast
+ "contrast_strengthen",
+ "contrast_weaken",
+ # quantization
+ "quantization",
+ "JPEG",
+ # light
+ "brighten",
+ "darken",
+ "LowLight",
+ # color
+ "saturate_strengthen",
+ "saturate_weaken",
+ "gray",
+ "ColorDistortion",
+ # infilling
+ "Inpainting",
+ # rotate
+ "rotate180",
+ # other
+ "Barrel",
+ "Pincushion",
+ "Elastic",
+ # spacial effect
+ "Rain",
+ "Frost",
+]
+
+
+def generate_paths_from_id(file_id: str, prompt: str) -> dict:
+ """
+ 根据文件ID自动生成所有相关文件的路径
+
+ Args:
+ file_id: str - 文件的唯一标识符 (例如: '5c79f1ea582c3faa093d2e09b906321d')
+
+ Returns:
+ dict: 包含所有生成路径的字典
+ """
+ base_path = 'demo_tasks/examples/graph200k'
+
+ paths = {
+ 'reference': f'{base_path}/{file_id}/{file_id}_reference.jpg',
+ 'target': f'{base_path}/{file_id}/{file_id}_target.jpg',
+ 'depth': f'{base_path}/{file_id}/{file_id}_depth-anything-v2_Large.jpg',
+ 'canny': f'{base_path}/{file_id}/{file_id}_canny_100_200_512.jpg',
+ 'hed': f'{base_path}/{file_id}/{file_id}_hed_512.jpg',
+ 'normal': f'{base_path}/{file_id}/{file_id}_dsine-normal-map.jpg',
+ 'style_target': f'{base_path}/{file_id}/{file_id}_instantx-style_0.jpg',
+ 'style_source': f'{base_path}/{file_id}/{file_id}_instantx-style_0_style.jpg',
+ 'sam2_mask': f'{base_path}/{file_id}/{file_id}_sam2_mask.jpg',
+ 'prompt': prompt
+ }
+
+ return paths
+
+
+dense_prediction_data = [
+ generate_paths_from_id('data-00004-of-00022-7170', prompt="Travel VPN app on a desktop screen. The interface is visible on a laptop in a modern airport lounge, captured from a side angle with natural daylight highlighting the sleek design, while planes can be seen through the large window behind the device."),
+ generate_paths_from_id('data-00005-of-00022-4396', prompt="A vintage porcelain collector's item. Beneath a blossoming cherry tree in early spring, this treasure is photographed up close, with soft pink petals drifting through the air and vibrant blossoms framing the scene."),
+ generate_paths_from_id('data-00018-of-00022-4948', prompt="Decorative kitchen salt shaker with intricate design. On a quaint countryside porch in the afternoon's gentle breeze, accompanied by pastel-colored flowers and vintage cutlery, it adds a touch of charm to the rustic scene."),
+ generate_paths_from_id('data-00013-of-00022-4696', prompt="A lifelike forest creature figurine. Nestled among drifting autumn leaves on a tree-lined walking path, it gazes out as pedestrians bundled in scarves pass by."),
+ generate_paths_from_id('data-00017-of-00022-8377', prompt="A colorful bike for young adventurers. In a bustling city street during a bright afternoon, it leans against a lamppost, surrounded by hurried pedestrians, with towering buildings providing an urban backdrop."),
+]
+
+
+subject_driven = [
+ dict(
+ name='Subject-driven generation',
+ image_type=["reference", "target"]),
+]
+subject_driven_text = [[x['name']] for x in subject_driven]
+
+
+style_transfer_with_subject = [
+ dict(
+ name='Style Transfer with Subject',
+ image_type=["reference", "style_source", "style_target"]),
+]
+style_transfer_with_subject_text = [[x['name']] for x in style_transfer_with_subject]
+
+
+condition_subject_fusion = [
+ dict(
+ name='Depth+Subject to Image',
+ image_type=["reference", "depth", "target"]),
+ dict(
+ name='Canny+Subject to Image',
+ image_type=["reference", "canny", "target"]),
+ dict(
+ name='Hed+Subject to Image',
+ image_type=["reference", "hed", "target"]),
+ dict(
+ name='Normal+Subject to Image',
+ image_type=["reference", "normal", "target"]),
+ dict(
+ name='SAM2+Subject to Image',
+ image_type=["reference", "sam2_mask", "target"]),
+]
+condition_subject_fusion_text = [[x['name']] for x in condition_subject_fusion]
+
+image_restoration_with_subject = [
+ dict(name=degradation, image_type=["reference", degradation, "target"])
+ for degradation in degradation_list
+]
+image_restoration_with_subject_text = [[x['name']] for x in image_restoration_with_subject]
+
+
+condition_subject_style_fusion = [
+ dict(
+ name='Depth+Subject+Style to Image',
+ image_type=["reference", "depth", "style_source", "style_target"]),
+ dict(
+ name='Canny+Subject+Style to Image',
+ image_type=["reference", "canny", "style_source", "style_target"]),
+ dict(
+ name='Hed+Subject+Style to Image',
+ image_type=["reference", "hed", "style_source", "style_target"]),
+ dict(
+ name='Normal+Subject+Style to Image',
+ image_type=["reference", "normal", "style_source", "style_target"]),
+ dict(
+ name='SAM2+Subject+Style to Image',
+ image_type=["reference", "sam2_mask", "style_source", "style_target"]),
+]
+condition_subject_style_fusion_text = [[x['name']] for x in condition_subject_style_fusion]
+
+
+def process_subject_driven_tasks(x):
+ for task in subject_driven:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = [x for x in dense_prediction_data if all([x.get(t, None) is not None and os.path.exists(x[t]) for t in image_type])]
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ rets.append(Image.open(image[t]))
+
+ content_prompt = get_content_instruction() + images[-1]['prompt']
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = 0.6
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
+
+
+def process_condition_subject_fusion_tasks(x):
+ for task in condition_subject_fusion:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = [x for x in dense_prediction_data if all([x.get(t, None) is not None and os.path.exists(x[t]) for t in image_type])]
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ rets.append(Image.open(image[t]))
+
+ content_prompt = get_content_instruction() + images[-1]['prompt']
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = 0.6
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
+
+
+def process_style_transfer_with_subject_tasks(x):
+ for task in style_transfer_with_subject:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = [x for x in dense_prediction_data if all([x.get(t, None) is not None and os.path.exists(x[t]) for t in image_type])]
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ if t == "style_source":
+ target = Image.open(image["style_target"])
+ source = Image.open(image[t])
+ source = source.resize(target.size)
+ rets.append(source)
+ else:
+ rets.append(Image.open(image[t]))
+
+ content_prompt = ""
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = 0.6
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
+
+
+def process_condition_subject_style_fusion_tasks(x):
+ for task in condition_subject_style_fusion:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = [x for x in dense_prediction_data if all([x.get(t, None) is not None and os.path.exists(x[t]) for t in image_type])]
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ if t == "style_source":
+ target = Image.open(image["style_target"])
+ source = Image.open(image[t])
+ source = source.resize(target.size)
+ rets.append(source)
+ else:
+ rets.append(Image.open(image[t]))
+
+ content_prompt = ""
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = 0.6
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
+
+
+def process_image_restoration_with_subject_tasks(x):
+ for task in image_restoration_with_subject:
+ if task['name'] == x[0]:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = ", ".join(image_prompt_list[:-1])
+ target_prompt = image_prompt_list[-1]
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+
+ # sample examples
+ valid_data = dense_prediction_data
+ n_samples = random.randint(2, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ if t == "target":
+ rets.append(Image.open(image["target"]))
+ elif t == "reference":
+ rets.append(Image.open(image["reference"]))
+ else:
+ deg_image, _ = add_degradation(np.array(Image.open(image["target"])), deg_type=t)
+ rets.append(deg_image)
+
+ content_prompt = get_content_instruction() + images[-1]['prompt']
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = 0.6
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
diff --git a/demo_tasks/gradio_tasks_tryon.py b/demo_tasks/gradio_tasks_tryon.py
new file mode 100644
index 0000000000000000000000000000000000000000..c783a01b2bbe5ba58559d553c33f506245623aa2
--- /dev/null
+++ b/demo_tasks/gradio_tasks_tryon.py
@@ -0,0 +1,50 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+
+
+task_instruction = "Each row shows a virtual try-on process that aims to put [IMAGE2] the clothing onto [IMAGE1] the person, producing [IMAGE3] the person wearing the new clothing."
+content_instruction = ""
+tryon = [
+ dict(
+ name='Virtual Try-On',
+ images=[
+ os.path.join('demo_tasks/examples/tryon/00700_00.jpg'),
+ os.path.join('demo_tasks/examples/tryon/03673_00.jpg'),
+ os.path.join('demo_tasks/examples/tryon/00700_00_tryon_catvton_0.jpg'),
+ os.path.join('demo_tasks/examples/tryon/00555_00.jpg'),
+ os.path.join('demo_tasks/examples/tryon/12265_00.jpg'),
+ os.path.join('demo_tasks/examples/tryon/00555_00_tryon_catvton_0.jpg'),
+ ],
+ grid_h=2,
+ grid_w=3,
+ task_prompt=task_instruction,
+ content_prompt=content_instruction,
+ ),
+]
+tryon_text = [[x['name']] for x in tryon]
+
+
+def process_tryon_tasks(x):
+ for task in tryon:
+ if task['name'] == x[0]:
+ task_prompt = task['task_prompt']
+ content_prompt = task['content_prompt']
+
+ images = task['images']
+ rets = []
+ for image in images:
+ rets.append(Image.open(image))
+
+ grid_h = task['grid_h']
+ grid_w = task['grid_w']
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
diff --git a/demo_tasks/gradio_tasks_unseen.py b/demo_tasks/gradio_tasks_unseen.py
new file mode 100644
index 0000000000000000000000000000000000000000..6dc7d27326722dbffb16f8666b88d11cc9784d3e
--- /dev/null
+++ b/demo_tasks/gradio_tasks_unseen.py
@@ -0,0 +1,276 @@
+import os
+from data.prefix_instruction import get_image_prompt, get_task_instruction, get_layout_instruction, get_content_instruction
+import random
+from PIL import Image
+
+
+def generate_paths_from_id(file_id: str, prompt: str) -> dict:
+ """
+ 根据文件ID自动生成所有相关文件的路径
+
+ Args:
+ file_id: str - 文件的唯一标识符 (例如: '5c79f1ea582c3faa093d2e09b906321d')
+
+ Returns:
+ dict: 包含所有生成路径的字典
+ """
+ base_path = 'demo_tasks/examples'
+
+ paths = {
+ 'target': f'{base_path}/{file_id}/{file_id}.jpg',
+ 'depth': f'{base_path}/{file_id}/{file_id}_depth-anything-v2_Large.jpg',
+ 'canny': f'{base_path}/{file_id}/{file_id}_canny_100_200_512.jpg',
+ 'hed': f'{base_path}/{file_id}/{file_id}_hed_512.jpg',
+ 'normal': f'{base_path}/{file_id}/{file_id}_dsine_normal_map.jpg',
+ 'openpose': f'{base_path}/{file_id}/{file_id}_openpose_fullres_nohand.jpg',
+ 'style_target': f'{base_path}/{file_id}/{file_id}_instantx-style_0.jpg',
+ 'style_source': f'{base_path}/{file_id}/{file_id}_instantx-style_0_style.jpg',
+ 'foreground': f'{base_path}/{file_id}/{file_id}_ben2-background-removal.jpg',
+ 'background': f'{base_path}/{file_id}/{file_id}_ben2-background-removal.jpg',
+ 'mask': f'{base_path}/{file_id}/{file_id}_qwen2_5_mask.jpg',
+ 'sam2_mask': f'{base_path}/{file_id}/{file_id}_sam2_mask.jpg',
+ 'prompt': prompt
+ }
+
+ return paths
+
+
+dense_prediction_data = [
+ generate_paths_from_id('2b74476568f7562a6aa832d423132ed3', prompt="Group photo of five young adults enjoying a rooftop gathering at dusk. The group is positioned in the center, with three women and two men smiling and embracing. The woman on the far left wears a floral top and holds a drink, looking slightly to the right. Next to her, a woman in a denim jacket stands close to a woman in a white blouse, both smiling directly at the camera. The fourth woman, in an orange top, stands close to the man on the far right, who wears a red shirt and blue blazer, smiling broadly. The background features a cityscape with a tall building and string lights hanging overhead, creating a warm, festive atmosphere. Soft natural lighting, warm color palette, shallow depth of field, intimate and joyful mood, slightly blurred background, urban rooftop setting, evening ambiance."),
+ generate_paths_from_id('de5a8b250bf407aa7e04913562dcba90', prompt="Close-up photo of a refreshing peach iced tea in a clear plastic cup, centrally positioned on a wooden surface. The drink is garnished with fresh mint leaves and ice cubes, with a yellow and white striped straw angled to the left. Surrounding the cup are whole and sliced peaches, scattered across the table, with their vibrant orange flesh and brown skin visible. The background is softly blurred, featuring bokeh effects from sunlight filtering through green foliage, creating a warm and inviting atmosphere. High contrast, natural lighting, shallow depth of field, vibrant color palette, photorealistic, glossy texture, summer vibe, visually balanced composition."),
+ generate_paths_from_id('2c4e256fa512cb7e7f433f4c7f9101de', prompt="A digital illustration of a small orange tabby kitten sitting in the center of a sunlit meadow, surrounded by white daisies with yellow centers. The kitten has large, expressive eyes and a pink nose, positioned directly facing the viewer. The daisies are scattered around the kitten, with some in the foreground and others in the background, creating a sense of depth. The background is softly blurred, emphasizing the kitten and flowers, with warm, golden sunlight filtering through, casting a gentle glow. Digital art, photorealistic, shallow depth of field, soft natural lighting, warm color palette, high contrast, serene, whimsical, visually balanced, intimate, detailed textures."),
+ generate_paths_from_id('9d39f75f1f728e097efeaff39acb4710', prompt="Serene beach scene at sunrise with a clear blue sky and calm ocean waves. The foreground features sandy beach with footprints leading towards the water, and a large, twisted pine tree with lush green foliage arching over the scene from the right. The sun is rising on the horizon, casting a warm glow and long shadows on the sand. In the background, a rocky outcrop covered with greenery is visible to the left. The ocean stretches out to the right, with gentle waves lapping at the shore. Photorealistic, high contrast, vibrant colors, natural lighting, warm color palette, tranquil atmosphere, balanced composition, sharp details, inviting and peaceful."),
+ generate_paths_from_id('78dc6506367d7aa43fe42a898abbfe4a', prompt="Ethereal digital illustration of a winged woman standing beside a majestic lion on a rocky outcrop. The woman, positioned slightly to the left, wears a flowing, cream-colored gown with intricate detailing and a red sash at the waist. Her long, dark hair cascades down her back, and she holds a golden, ornate vessel in her right hand. The lion stands to her right, its mane richly textured and its gaze directed forward. The background features a vibrant sky with fluffy clouds and a bright sun, casting a warm glow over the scene. The foreground includes delicate orange flowers and tall grasses, adding a touch of nature to the composition. Digital art, high contrast, vivid color palette, soft lighting, surreal and fantastical atmosphere, detailed textures, dynamic composition, harmonious balance, ethereal and majestic mood."),
+ generate_paths_from_id('79f2ee632f1be3ad64210a641c4e201b', prompt="A serene portrait of a young woman with long dark hair, wearing a beige dress with intricate gold embroidery, standing in a softly lit room. She holds a large bouquet of pale pink roses in a black box, positioned in the center of the frame. The background features a tall green plant to the left and a framed artwork on the wall to the right. A window on the left allows natural light to gently illuminate the scene. The woman gazes down at the bouquet with a calm expression. Soft natural lighting, warm color palette, high contrast, photorealistic, intimate, elegant, visually balanced, serene atmosphere."),
+ generate_paths_from_id('88d0ba30e2c0bc4401cf2633cac162d4', prompt="A serene cinematic still of a woman with long, platinum blonde hair sitting on a rocky shore, facing the ocean. She wears a long, dark green dress with intricate detailing on the sleeves. Her expression is joyful, looking upwards towards a black bird in mid-flight, positioned in the upper left of the frame. The ocean waves gently crash in the background, creating a soft, rhythmic pattern. The sky is overcast, casting a diffused, cool light over the scene. Cinematic still, medium depth of field, soft natural lighting, muted color palette, ethereal and tranquil atmosphere, visually balanced composition, gentle contrast, serene and contemplative mood."),
+ generate_paths_from_id('0fdaecdb7906a1bf0d6e202363f15de3', prompt="A whimsical digital illustration of a retro-futuristic robot standing in a cozy, softly lit room. The robot, positioned centrally, has a metallic, spherical head with glowing red eyes and large headphones. It wears a brown leather vest and shorts, with articulated black arms and legs, and holds a vintage cassette tape in its right hand. The robot's feet are clad in brown and black boots. In the background, a blurred window with white frames is visible on the left, and a neon sign reading \"biogarty\" glows red on the right wall. Potted plants are placed on the floor and on a table in the background, adding a touch of greenery. The floor is wooden, and a laptop is partially visible in the left foreground. The scene is bathed in warm, natural light with a soft focus, creating a nostalgic and playful atmosphere. Digital illustration, medium depth of field, soft natural lighting, warm color palette, retro-futuristic, whimsical, visually balanced, glossy textures, cozy interior setting."),
+]
+
+
+def generate_paths_from_id(file_id: str, prompt: str) -> dict:
+ """
+ 根据文件ID自动生成所有相关文件的路径
+
+ Args:
+ file_id: str - 文件的唯一标识符 (例如: '5c79f1ea582c3faa093d2e09b906321d')
+
+ Returns:
+ dict: 包含所有生成路径的字典
+ """
+ base_path = 'demo_tasks/examples'
+
+ paths = {
+ 'target': f'{base_path}/{file_id}/{file_id}.jpg',
+ 'depth': f'{base_path}/{file_id}/{file_id}_depth-anything-v2_Large.jpg',
+ 'canny': f'{base_path}/{file_id}/{file_id}_canny_100_200_512.jpg',
+ 'hed': f'{base_path}/{file_id}/{file_id}_hed_512.jpg',
+ 'normal': f'{base_path}/{file_id}/{file_id}_dsine_normal_map.jpg',
+ 'openpose': f'{base_path}/{file_id}/{file_id}_openpose_fullres_nohand.jpg',
+ 'style_target': f'{base_path}/{file_id}/{file_id}_instantx-style_0.jpg',
+ 'style_source': f'{base_path}/{file_id}/{file_id}_instantx-style_0_style.jpg',
+ 'foreground': f'{base_path}/{file_id}/{file_id}_ben2-background-removal.jpg',
+ 'background': f'{base_path}/{file_id}/{file_id}_ben2-background-removal.jpg',
+ 'mask': f'{base_path}/{file_id}/{file_id}_qwen2_5_mask.jpg',
+ 'sam2_mask': f'{base_path}/{file_id}/{file_id}_sam2_mask.jpg',
+ 'prompt': prompt
+ }
+
+ return paths
+
+
+dense_prediction_data = [
+ generate_paths_from_id('2b74476568f7562a6aa832d423132ed3', prompt="Group photo of five young adults enjoying a rooftop gathering at dusk. The group is positioned in the center, with three women and two men smiling and embracing. The woman on the far left wears a floral top and holds a drink, looking slightly to the right. Next to her, a woman in a denim jacket stands close to a woman in a white blouse, both smiling directly at the camera. The fourth woman, in an orange top, stands close to the man on the far right, who wears a red shirt and blue blazer, smiling broadly. The background features a cityscape with a tall building and string lights hanging overhead, creating a warm, festive atmosphere. Soft natural lighting, warm color palette, shallow depth of field, intimate and joyful mood, slightly blurred background, urban rooftop setting, evening ambiance."),
+ generate_paths_from_id('de5a8b250bf407aa7e04913562dcba90', prompt="Close-up photo of a refreshing peach iced tea in a clear plastic cup, centrally positioned on a wooden surface. The drink is garnished with fresh mint leaves and ice cubes, with a yellow and white striped straw angled to the left. Surrounding the cup are whole and sliced peaches, scattered across the table, with their vibrant orange flesh and brown skin visible. The background is softly blurred, featuring bokeh effects from sunlight filtering through green foliage, creating a warm and inviting atmosphere. High contrast, natural lighting, shallow depth of field, vibrant color palette, photorealistic, glossy texture, summer vibe, visually balanced composition."),
+ generate_paths_from_id('2c4e256fa512cb7e7f433f4c7f9101de', prompt="A digital illustration of a small orange tabby kitten sitting in the center of a sunlit meadow, surrounded by white daisies with yellow centers. The kitten has large, expressive eyes and a pink nose, positioned directly facing the viewer. The daisies are scattered around the kitten, with some in the foreground and others in the background, creating a sense of depth. The background is softly blurred, emphasizing the kitten and flowers, with warm, golden sunlight filtering through, casting a gentle glow. Digital art, photorealistic, shallow depth of field, soft natural lighting, warm color palette, high contrast, serene, whimsical, visually balanced, intimate, detailed textures."),
+ generate_paths_from_id('5bf755ed9dbb9b3e223e7ba35232b06e', prompt="A whimsical digital illustration of an astronaut emerging from a cracked eggshell on a barren, moon-like surface. The astronaut is centrally positioned, wearing a white space suit with a reflective visor helmet, holding a small yellow flag with the words 'HELLO WORLD' in black text. The eggshell is partially buried in the textured, rocky terrain, with scattered rocks and dust around it. The background is a blurred, dark blue gradient with circular bokeh effects, suggesting a distant, starry space environment. Soft, warm lighting from the top right creates a gentle glow on the astronaut's suit and the flag, adding a sense of discovery and playfulness. Digital illustration, shallow depth of field, soft focus, warm color palette, whimsical, surreal, high contrast, glossy textures, imaginative, visually balanced."),
+ generate_paths_from_id('9c565b1aad76b22f5bb836744a93561a', prompt="Majestic photo of a golden eagle perched on a rocky outcrop in a mountainous landscape. The eagle is positioned in the right foreground, facing left, with its sharp beak and keen eyes prominently visible. Its plumage is a mix of dark brown and golden hues, with intricate feather details. The background features a soft-focus view of snow-capped mountains under a cloudy sky, creating a serene and grandiose atmosphere. The foreground includes rugged rocks and patches of green moss. Photorealistic, medium depth of field, soft natural lighting, cool color palette, high contrast, sharp focus on the eagle, blurred background, tranquil, majestic, wildlife photography."),
+ generate_paths_from_id('9d39f75f1f728e097efeaff39acb4710', prompt="Serene beach scene at sunrise with a clear blue sky and calm ocean waves. The foreground features sandy beach with footprints leading towards the water, and a large, twisted pine tree with lush green foliage arching over the scene from the right. The sun is rising on the horizon, casting a warm glow and long shadows on the sand. In the background, a rocky outcrop covered with greenery is visible to the left. The ocean stretches out to the right, with gentle waves lapping at the shore. Photorealistic, high contrast, vibrant colors, natural lighting, warm color palette, tranquil atmosphere, balanced composition, sharp details, inviting and peaceful."),
+ generate_paths_from_id('012cd3921e1f97d761eeff580f918ff9', prompt="Portrait of a young woman with long dark hair styled in an elegant updo, smiling directly at the camera. She is wearing a white, floral-embroidered strapless dress, positioned slightly to the right of the frame. Her makeup is subtle yet polished, with a focus on her eyes and lips. She wears a pair of dangling, ornate earrings. Surrounding her are vibrant red roses and lush green foliage, creating a natural and romantic backdrop. The lighting is soft and natural, highlighting her features and casting gentle shadows. The image has a shallow depth of field, with the background softly blurred, emphasizing the subject. Photorealistic, warm color palette, high contrast, intimate, serene, visually balanced."),
+ generate_paths_from_id('53b3f413257bee9e499b823b44623b1a', prompt="A stunning photograph of a red fox standing in a snowy landscape, gazing intently at a small, icy stream in the foreground. The fox is positioned slightly to the left, its vibrant orange fur contrasting with the white snow. Surrounding the fox are delicate branches covered in frost, adding texture to the scene. Above, icicles hang from the branches, catching the light and creating a sense of cold. The reflection of the fox is visible in the still water, enhancing the symmetry of the composition. The background is softly blurred, with hints of blue and white suggesting a serene winter environment. High contrast, sharp focus on the fox, soft natural lighting, cool color palette with warm highlights, photorealistic, tranquil, visually balanced, ethereal winter atmosphere."),
+ generate_paths_from_id('78dc6506367d7aa43fe42a898abbfe4a', prompt="Ethereal digital illustration of a winged woman standing beside a majestic lion on a rocky outcrop. The woman, positioned slightly to the left, wears a flowing, cream-colored gown with intricate detailing and a red sash at the waist. Her long, dark hair cascades down her back, and she holds a golden, ornate vessel in her right hand. The lion stands to her right, its mane richly textured and its gaze directed forward. The background features a vibrant sky with fluffy clouds and a bright sun, casting a warm glow over the scene. The foreground includes delicate orange flowers and tall grasses, adding a touch of nature to the composition. Digital art, high contrast, vivid color palette, soft lighting, surreal and fantastical atmosphere, detailed textures, dynamic composition, harmonious balance, ethereal and majestic mood."),
+ generate_paths_from_id('79f2ee632f1be3ad64210a641c4e201b', prompt="A serene portrait of a young woman with long dark hair, wearing a beige dress with intricate gold embroidery, standing in a softly lit room. She holds a large bouquet of pale pink roses in a black box, positioned in the center of the frame. The background features a tall green plant to the left and a framed artwork on the wall to the right. A window on the left allows natural light to gently illuminate the scene. The woman gazes down at the bouquet with a calm expression. Soft natural lighting, warm color palette, high contrast, photorealistic, intimate, elegant, visually balanced, serene atmosphere."),
+ generate_paths_from_id('88d0ba30e2c0bc4401cf2633cac162d4', prompt="A serene cinematic still of a woman with long, platinum blonde hair sitting on a rocky shore, facing the ocean. She wears a long, dark green dress with intricate detailing on the sleeves. Her expression is joyful, looking upwards towards a black bird in mid-flight, positioned in the upper left of the frame. The ocean waves gently crash in the background, creating a soft, rhythmic pattern. The sky is overcast, casting a diffused, cool light over the scene. Cinematic still, medium depth of field, soft natural lighting, muted color palette, ethereal and tranquil atmosphere, visually balanced composition, gentle contrast, serene and contemplative mood."),
+ generate_paths_from_id('93bc1c43af2d6c91ac2fc966bf7725a2', prompt="Illustration of a young woman with long, wavy red hair sitting at a round wooden table in a sunlit café. She is positioned slightly to the right, holding a white cup in her right hand, looking directly at the viewer with a gentle smile. She wears a white long-sleeve top and blue jeans. On the table, there is a croissant, a bowl of jam, a cup of coffee, and an open magazine. The background features large windows with a view of a street lined with trees and parked cars, blurred to suggest motion. Potted plants are visible outside and inside the café. Warm, natural lighting, soft shadows, vibrant color palette, photorealistic textures, cozy and inviting atmosphere, digital illustration, high contrast, serene and relaxed mood."),
+ generate_paths_from_id('10d7dcae5240b8cc8c9427e876b4f462', prompt="A stylish winter portrait of a young woman in a snowy landscape, wearing a brown fur coat, black turtleneck, and brown leather pants. She is positioned slightly to the right, looking down at her smartphone with a focused expression. A wide-brimmed brown cowboy hat sits atop her head. To her left, a Siberian Husky with striking blue eyes stands attentively, its fur a mix of black, white, and grey. The background features a blurred, desolate winter scene with bare trees and dry grasses, creating a serene and isolated atmosphere. The foreground includes snow-covered ground and sparse, dried plants. Photorealistic, medium depth of field, soft natural lighting, muted color palette, high contrast, fashion photography, sharp focus on the subject, tranquil, elegant, visually balanced."),
+ generate_paths_from_id('0fdaecdb7906a1bf0d6e202363f15de3', prompt="A whimsical digital illustration of a retro-futuristic robot standing in a cozy, softly lit room. The robot, positioned centrally, has a metallic, spherical head with glowing red eyes and large headphones. It wears a brown leather vest and shorts, with articulated black arms and legs, and holds a vintage cassette tape in its right hand. The robot's feet are clad in brown and black boots. In the background, a blurred window with white frames is visible on the left, and a neon sign reading \"biogarty\" glows red on the right wall. Potted plants are placed on the floor and on a table in the background, adding a touch of greenery. The floor is wooden, and a laptop is partially visible in the left foreground. The scene is bathed in warm, natural light with a soft focus, creating a nostalgic and playful atmosphere. Digital illustration, medium depth of field, soft natural lighting, warm color palette, retro-futuristic, whimsical, visually balanced, glossy textures, cozy interior setting."),
+]
+
+
+unseen_tasks = [
+ dict(
+ name='Frontal Face Reconstruction',
+ images=[
+ 'demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_0.jpg',
+ 'demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_2.jpg',
+ 'demo_tasks/examples/face/34e1633a-369f-4324-86c3-3e6418ec00be/face_1.jpg',
+ 'demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_0.jpg',
+ 'demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_2.jpg',
+ 'demo_tasks/examples/face/cb5d403a-f1bb-4392-8302-24846893a797/face_1.jpg',
+ 'demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_8.jpg',
+ 'demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_6.jpg',
+ 'demo_tasks/examples/face/2ef6aa5a-e751-4bf2-a302-0237ab460627/face_1.jpg',
+ ],
+ grid_h=3,
+ grid_w=3,
+ task_prompt="Each row presents multi-view of a face, given a frontal face reconstruction task that leverages [IMAGE1] a left side of the face and [IMAGE2] a right side of the face, to generate [IMAGE3] a frontal face that faces the center of the lens.",
+ content_prompt="The content of the last image in the final row is: the woman's frontal face that faces the center of the lens.",
+ ),
+ dict(
+ name='Image to Depth + Normal + Hed',
+ image_type=["target", "depth", "normal", "hed"],
+ mask=[0, 1, 1, 1],
+ ),
+ dict(
+ name='Depth to Image + Relighting',
+ examples=[
+ dict(
+ images=[
+ 'demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6_depth.jpg',
+ 'demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6.jpg',
+ 'demo_tasks/examples/relighting/02dad6943d2033198a89c1d5f222db2eacb293c6_Left.jpg',
+ 'demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010_depth.jpg',
+ 'demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010.jpg',
+ 'demo_tasks/examples/relighting/02af9fa52ff41e64de8e3212683c9ed43bd91010_Left.jpg',
+ ],
+ grid_h=2,
+ grid_w=3,
+ task_prompt="Each row outlines a logical process, starting from [IMAGE1] depth map highlighting gray-level depth variations, to achieve [IMAGE2] an image with flawless clarity and [IMAGE3] the image with manipulated illumination and changed background.",
+ content_prompt="In the last row, the illumination comes from left side of the image, the light effects are " + "light and shadow.",
+ mask=[0, 1, 1],
+ ),
+ dict(
+ images=[
+ 'demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657_depth.jpg',
+ 'demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657.jpg',
+ 'demo_tasks/examples/relighting/02dd1c7c81e77e22ddba378a121fc371afcc9657_Left.jpg',
+ #
+ 'demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8_depth.jpg',
+ 'demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8.jpg',
+ 'demo_tasks/examples/relighting/02dcc762ae13127e3975ec043f13342490f61cf8_Left.jpg',
+ #
+ 'demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4_depth.jpg',
+ 'demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4.jpg',
+ 'demo_tasks/examples/relighting/02dd0f49dceaf611e0173319e26b4e6e1b7a6dd4_Left.jpg',
+ ],
+ grid_h=3,
+ grid_w=3,
+ task_prompt="Each row outlines a logical process, starting from [IMAGE1] depth map highlighting gray-level depth variations, to achieve [IMAGE2] an image with flawless clarity and [IMAGE3] the image with manipulated illumination and changed background.",
+ content_prompt="In the last row, the illumination comes from left side of the image, the light effects are " + "shadow from window.",
+ mask=[0, 1, 1],
+ )
+ ],
+ ),
+ dict(
+ name='Pose + Edge to Image',
+ examples=[
+ dict(
+ images=[
+ 'demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_openpose_fullres_nohand.jpg',
+ 'demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3_hed_512.jpg',
+ 'demo_tasks/examples/2b74476568f7562a6aa832d423132ed3/2b74476568f7562a6aa832d423132ed3.jpg',
+ 'demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_openpose_fullres_nohand.jpg',
+ 'demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a_edge.jpg',
+ 'demo_tasks/examples/78dc6506367d7aa43fe42a898abbfe4a/78dc6506367d7aa43fe42a898abbfe4a.jpg',
+ ],
+ grid_h=2,
+ grid_w=3,
+ task_prompt="Every row demonstrates how to transform [IMAGE1] human pose with colored lines for bone structure and [IMAGE2] canny map with sharp white edges and dark into [IMAGE3] a visually striking and clear picture through a logical approach.",
+ content_prompt="The content of the last image in the concluding row is: Ethereal digital illustration of a winged woman standing beside a majestic lion on a rocky outcrop. The woman, positioned slightly to the left, wears a flowing, cream-colored gown with intricate detailing and a red sash at the waist. Her long, dark hair cascades down her back, and she holds a golden, ornate vessel in her right hand. The lion stands to her right, its mane richly textured and its gaze directed forward. The background features a vibrant sky with fluffy clouds and a bright sun, casting a warm glow over the scene. The foreground includes delicate orange flowers and tall grasses, adding a touch of nature to the composition. Digital art, high contrast, vivid color palette, soft lighting, surreal and fantastical atmosphere, detailed textures, dynamic composition, harmonious balance, ethereal and majestic mood.",
+ mask=[0, 1],
+ )
+ ]
+ ),
+ dict(
+ name='Attribute Transformation',
+ examples=[
+ dict(
+ images=[
+ 'demo_tasks/examples/property/1_source.jpg',
+ 'demo_tasks/examples/property/1_target.jpg',
+ 'demo_tasks/examples/property/2_source.jpg',
+ 'demo_tasks/examples/property/2_target.jpg',
+ 'demo_tasks/examples/property/3_source.jpg',
+ 'demo_tasks/examples/property/3_target.jpg',
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt="In each row, a logical task is demonstrated to achieve [IMAGE2] a high-aesthetic image based on [IMAGE1] an aesthetically pleasing photograph. Each row shows a process to edit the image with the given editing instruction. The editing instruction in the last row is: turn the color of the sunglasses to green. <\editing instruction>",
+ content_prompt="",
+ mask=[0, 1],
+ )
+ ]
+ ),
+ dict(
+ name='Environment Modification',
+ examples=[
+ dict(
+ images=[
+ 'demo_tasks/examples/env/1_source.jpg',
+ 'demo_tasks/examples/env/1_target.jpg',
+ 'demo_tasks/examples/env/2_source.jpg',
+ 'demo_tasks/examples/env/2_target.jpg',
+ 'demo_tasks/examples/env/3_source.jpg',
+ 'demo_tasks/examples/env/3_target.jpg',
+ ],
+ grid_h=3,
+ grid_w=2,
+ task_prompt="In each row, a logical task is demonstrated to achieve [IMAGE2] a high-aesthetic image based on [IMAGE1] an aesthetically pleasing photograph. Each row shows a process to edit the image with the given editing instruction. The editing instruction in the last row is: change the weather to a snowy scene in winter. <\editing instruction>",
+ content_prompt="",
+ mask=[0, 1],
+ )
+ ]
+ )
+]
+unseen_tasks_text = [[x['name']] for x in unseen_tasks]
+
+
+def process_unseen_tasks(x):
+ for task in unseen_tasks:
+ if 'Image to Depth + Normal + Hed' == x[0] == task['name']:
+ image_type = task['image_type']
+ image_prompt_list = [get_image_prompt(x)[0] for x in image_type]
+ image_prompt_list = [f"[IMAGE{idx+1}] {image_prompt}" for idx, image_prompt in enumerate(image_prompt_list)]
+ condition_prompt = image_prompt_list[0]
+ target_prompt = ", ".join(image_prompt_list[1:])
+ task_prompt = get_task_instruction(condition_prompt, target_prompt)
+ # sample examples
+ valid_data = [x for x in dense_prediction_data if all([x.get(t, None) is not None and os.path.exists(x[t]) for t in image_type])]
+ n_samples = random.randint(3, min(len(valid_data), 3))
+ images = random.sample(valid_data, k=n_samples)
+ rets = []
+ for image in images:
+ for t in image_type:
+ rets.append(Image.open(image[t]))
+
+ content_prompt = ""
+
+ grid_h = n_samples
+ grid_w = len(image_type)
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+ elif x[0] == task['name']:
+ task = random.choice(task['examples'])
+ task_prompt = task['task_prompt']
+ content_prompt = task['content_prompt']
+
+ images = task['images']
+ rets = []
+ for image in images:
+ rets.append(Image.open(image))
+
+ grid_h = task['grid_h']
+ grid_w = task['grid_w']
+ mask = task.get('mask', [0 for _ in range(grid_w - 1)] + [1])
+ layout_prompt = get_layout_instruction(grid_w, grid_h)
+
+ upsampling_noise = None
+ steps = None
+ outputs = [mask, grid_h, grid_w, layout_prompt, task_prompt, content_prompt, upsampling_noise, steps] + rets
+ break
+
+ return outputs
diff --git a/flux/__init__.py b/flux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dddc6a38b801798e6675dea1498e32ffbc8c39ab
--- /dev/null
+++ b/flux/__init__.py
@@ -0,0 +1,13 @@
+try:
+ from ._version import (
+ version as __version__, # type: ignore
+ version_tuple,
+ )
+except ImportError:
+ __version__ = "unknown (no version information available)"
+ version_tuple = (0, 0, "unknown", "noinfo")
+
+from pathlib import Path
+
+PACKAGE = __package__.replace("_", "-")
+PACKAGE_ROOT = Path(__file__).parent
diff --git a/flux/__main__.py b/flux/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5cf0fd2444d4cda4053fa74dad3371556b886e5
--- /dev/null
+++ b/flux/__main__.py
@@ -0,0 +1,4 @@
+from .cli import app
+
+if __name__ == "__main__":
+ app()
diff --git a/flux/api.py b/flux/api.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a60884012e66a944b18683a5d3d100828c5b1b9
--- /dev/null
+++ b/flux/api.py
@@ -0,0 +1,225 @@
+import io
+import os
+import time
+from pathlib import Path
+
+import requests
+from PIL import Image
+
+API_URL = "https://api.bfl.ml"
+API_ENDPOINTS = {
+ "flux.1-pro": "flux-pro",
+ "flux.1-dev": "flux-dev",
+ "flux.1.1-pro": "flux-pro-1.1",
+}
+
+
+class ApiException(Exception):
+ def __init__(self, status_code: int, detail: str | list[dict] | None = None):
+ super().__init__()
+ self.detail = detail
+ self.status_code = status_code
+
+ def __str__(self) -> str:
+ return self.__repr__()
+
+ def __repr__(self) -> str:
+ if self.detail is None:
+ message = None
+ elif isinstance(self.detail, str):
+ message = self.detail
+ else:
+ message = "[" + ",".join(d["msg"] for d in self.detail) + "]"
+ return f"ApiException({self.status_code=}, {message=}, detail={self.detail})"
+
+
+class ImageRequest:
+ def __init__(
+ self,
+ # api inputs
+ prompt: str,
+ name: str = "flux.1.1-pro",
+ width: int | None = None,
+ height: int | None = None,
+ num_steps: int | None = None,
+ prompt_upsampling: bool | None = None,
+ seed: int | None = None,
+ guidance: float | None = None,
+ interval: float | None = None,
+ safety_tolerance: int | None = None,
+ # behavior of this class
+ validate: bool = True,
+ launch: bool = True,
+ api_key: str | None = None,
+ ):
+ """
+ Manages an image generation request to the API.
+
+ All parameters not specified will use the API defaults.
+
+ Args:
+ prompt: Text prompt for image generation.
+ width: Width of the generated image in pixels. Must be a multiple of 32.
+ height: Height of the generated image in pixels. Must be a multiple of 32.
+ name: Which model version to use
+ num_steps: Number of steps for the image generation process.
+ prompt_upsampling: Whether to perform upsampling on the prompt.
+ seed: Optional seed for reproducibility.
+ guidance: Guidance scale for image generation.
+ safety_tolerance: Tolerance level for input and output moderation.
+ Between 0 and 6, 0 being most strict, 6 being least strict.
+ validate: Run input validation
+ launch: Directly launches request
+ api_key: Your API key if not provided by the environment
+
+ Raises:
+ ValueError: For invalid input, when `validate`
+ ApiException: For errors raised from the API
+ """
+ if validate:
+ if name not in API_ENDPOINTS.keys():
+ raise ValueError(f"Invalid model {name}")
+ elif width is not None and width % 32 != 0:
+ raise ValueError(f"width must be divisible by 32, got {width}")
+ elif width is not None and not (256 <= width <= 1440):
+ raise ValueError(f"width must be between 256 and 1440, got {width}")
+ elif height is not None and height % 32 != 0:
+ raise ValueError(f"height must be divisible by 32, got {height}")
+ elif height is not None and not (256 <= height <= 1440):
+ raise ValueError(f"height must be between 256 and 1440, got {height}")
+ elif num_steps is not None and not (1 <= num_steps <= 50):
+ raise ValueError(f"steps must be between 1 and 50, got {num_steps}")
+ elif guidance is not None and not (1.5 <= guidance <= 5.0):
+ raise ValueError(f"guidance must be between 1.5 and 4, got {guidance}")
+ elif interval is not None and not (1.0 <= interval <= 4.0):
+ raise ValueError(f"interval must be between 1 and 4, got {interval}")
+ elif safety_tolerance is not None and not (0 <= safety_tolerance <= 6.0):
+ raise ValueError(f"safety_tolerance must be between 0 and 6, got {interval}")
+
+ if name == "flux.1-dev":
+ if interval is not None:
+ raise ValueError("Interval is not supported for flux.1-dev")
+ if name == "flux.1.1-pro":
+ if interval is not None or num_steps is not None or guidance is not None:
+ raise ValueError("Interval, num_steps and guidance are not supported for " "flux.1.1-pro")
+
+ self.name = name
+ self.request_json = {
+ "prompt": prompt,
+ "width": width,
+ "height": height,
+ "steps": num_steps,
+ "prompt_upsampling": prompt_upsampling,
+ "seed": seed,
+ "guidance": guidance,
+ "interval": interval,
+ "safety_tolerance": safety_tolerance,
+ }
+ self.request_json = {key: value for key, value in self.request_json.items() if value is not None}
+
+ self.request_id: str | None = None
+ self.result: dict | None = None
+ self._image_bytes: bytes | None = None
+ self._url: str | None = None
+ if api_key is None:
+ self.api_key = os.environ.get("BFL_API_KEY")
+ else:
+ self.api_key = api_key
+
+ if launch:
+ self.request()
+
+ def request(self):
+ """
+ Request to generate the image.
+ """
+ if self.request_id is not None:
+ return
+ response = requests.post(
+ f"{API_URL}/v1/{API_ENDPOINTS[self.name]}",
+ headers={
+ "accept": "application/json",
+ "x-key": self.api_key,
+ "Content-Type": "application/json",
+ },
+ json=self.request_json,
+ )
+ result = response.json()
+ if response.status_code != 200:
+ raise ApiException(status_code=response.status_code, detail=result.get("detail"))
+ self.request_id = response.json()["id"]
+
+ def retrieve(self) -> dict:
+ """
+ Wait for the generation to finish and retrieve response.
+ """
+ if self.request_id is None:
+ self.request()
+ while self.result is None:
+ response = requests.get(
+ f"{API_URL}/v1/get_result",
+ headers={
+ "accept": "application/json",
+ "x-key": self.api_key,
+ },
+ params={
+ "id": self.request_id,
+ },
+ )
+ result = response.json()
+ if "status" not in result:
+ raise ApiException(status_code=response.status_code, detail=result.get("detail"))
+ elif result["status"] == "Ready":
+ self.result = result["result"]
+ elif result["status"] == "Pending":
+ time.sleep(0.5)
+ else:
+ raise ApiException(status_code=200, detail=f"API returned status '{result['status']}'")
+ return self.result
+
+ @property
+ def bytes(self) -> bytes:
+ """
+ Generated image as bytes.
+ """
+ if self._image_bytes is None:
+ response = requests.get(self.url)
+ if response.status_code == 200:
+ self._image_bytes = response.content
+ else:
+ raise ApiException(status_code=response.status_code)
+ return self._image_bytes
+
+ @property
+ def url(self) -> str:
+ """
+ Public url to retrieve the image from
+ """
+ if self._url is None:
+ result = self.retrieve()
+ self._url = result["sample"]
+ return self._url
+
+ @property
+ def image(self) -> Image.Image:
+ """
+ Load the image as a PIL Image
+ """
+ return Image.open(io.BytesIO(self.bytes))
+
+ def save(self, path: str):
+ """
+ Save the generated image to a local path
+ """
+ suffix = Path(self.url).suffix
+ if not path.endswith(suffix):
+ path = path + suffix
+ Path(path).resolve().parent.mkdir(parents=True, exist_ok=True)
+ with open(path, "wb") as file:
+ file.write(self.bytes)
+
+
+if __name__ == "__main__":
+ from fire import Fire
+
+ Fire(ImageRequest)
diff --git a/flux/cli.py b/flux/cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..e844c765d5e2a969cfff53109ff745b0cb8edf5c
--- /dev/null
+++ b/flux/cli.py
@@ -0,0 +1,238 @@
+import os
+import re
+import time
+from dataclasses import dataclass
+from glob import iglob
+
+import torch
+from fire import Fire
+from transformers import pipeline
+
+from flux.sampling import denoise, get_noise, get_schedule, prepare, unpack
+from flux.util import configs, load_ae, load_clip, load_flow_model, load_t5, save_image
+
+NSFW_THRESHOLD = 0.85
+
+
+@dataclass
+class SamplingOptions:
+ prompt: str
+ width: int
+ height: int
+ num_steps: int
+ guidance: float
+ seed: int | None
+
+
+def parse_prompt(options: SamplingOptions) -> SamplingOptions | None:
+ user_question = "Next prompt (write /h for help, /q to quit and leave empty to repeat):\n"
+ usage = (
+ "Usage: Either write your prompt directly, leave this field empty "
+ "to repeat the prompt or write a command starting with a slash:\n"
+ "- '/w ' will set the width of the generated image\n"
+ "- '/h ' will set the height of the generated image\n"
+ "- '/s ' sets the next seed\n"
+ "- '/g ' sets the guidance (flux-dev only)\n"
+ "- '/n ' sets the number of steps\n"
+ "- '/q' to quit"
+ )
+
+ while (prompt := input(user_question)).startswith("/"):
+ if prompt.startswith("/w"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, width = prompt.split()
+ options.width = 16 * (int(width) // 16)
+ print(
+ f"Setting resolution to {options.width} x {options.height} "
+ f"({options.height *options.width/1e6:.2f}MP)"
+ )
+ elif prompt.startswith("/h"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, height = prompt.split()
+ options.height = 16 * (int(height) // 16)
+ print(
+ f"Setting resolution to {options.width} x {options.height} "
+ f"({options.height *options.width/1e6:.2f}MP)"
+ )
+ elif prompt.startswith("/g"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, guidance = prompt.split()
+ options.guidance = float(guidance)
+ print(f"Setting guidance to {options.guidance}")
+ elif prompt.startswith("/s"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, seed = prompt.split()
+ options.seed = int(seed)
+ print(f"Setting seed to {options.seed}")
+ elif prompt.startswith("/n"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, steps = prompt.split()
+ options.num_steps = int(steps)
+ print(f"Setting number of steps to {options.num_steps}")
+ elif prompt.startswith("/q"):
+ print("Quitting")
+ return None
+ else:
+ if not prompt.startswith("/h"):
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ print(usage)
+ if prompt != "":
+ options.prompt = prompt
+ return options
+
+
+@torch.inference_mode()
+def main(
+ name: str = "flux-schnell",
+ width: int = 1360,
+ height: int = 768,
+ seed: int | None = None,
+ prompt: str = (
+ "a photo of a forest with mist swirling around the tree trunks. The word "
+ '"FLUX" is painted over it in big, red brush strokes with visible texture'
+ ),
+ device: str = "cuda" if torch.cuda.is_available() else "cpu",
+ num_steps: int | None = None,
+ loop: bool = False,
+ guidance: float = 3.5,
+ offload: bool = False,
+ output_dir: str = "output",
+ add_sampling_metadata: bool = True,
+):
+ """
+ Sample the flux model. Either interactively (set `--loop`) or run for a
+ single image.
+
+ Args:
+ name: Name of the model to load
+ height: height of the sample in pixels (should be a multiple of 16)
+ width: width of the sample in pixels (should be a multiple of 16)
+ seed: Set a seed for sampling
+ output_name: where to save the output image, `{idx}` will be replaced
+ by the index of the sample
+ prompt: Prompt used for sampling
+ device: Pytorch device
+ num_steps: number of sampling steps (default 4 for schnell, 50 for guidance distilled)
+ loop: start an interactive session and sample multiple times
+ guidance: guidance value used for guidance distillation
+ add_sampling_metadata: Add the prompt to the image Exif metadata
+ """
+ nsfw_classifier = pipeline("image-classification", model="Falconsai/nsfw_image_detection", device=device)
+
+ if name not in configs:
+ available = ", ".join(configs.keys())
+ raise ValueError(f"Got unknown model name: {name}, chose from {available}")
+
+ torch_device = torch.device(device)
+ if num_steps is None:
+ num_steps = 4 if name == "flux-schnell" else 50
+
+ # allow for packing and conversion to latent space
+ height = 16 * (height // 16)
+ width = 16 * (width // 16)
+
+ output_name = os.path.join(output_dir, "img_{idx}.jpg")
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ idx = 0
+ else:
+ fns = [fn for fn in iglob(output_name.format(idx="*")) if re.search(r"img_[0-9]+\.jpg$", fn)]
+ if len(fns) > 0:
+ idx = max(int(fn.split("_")[-1].split(".")[0]) for fn in fns) + 1
+ else:
+ idx = 0
+
+ # init all components
+ t5 = load_t5(torch_device, max_length=256 if name == "flux-schnell" else 512)
+ clip = load_clip(torch_device)
+ model = load_flow_model(name, device="cpu" if offload else torch_device)
+ ae = load_ae(name, device="cpu" if offload else torch_device)
+
+ rng = torch.Generator(device="cpu")
+ opts = SamplingOptions(
+ prompt=prompt,
+ width=width,
+ height=height,
+ num_steps=num_steps,
+ guidance=guidance,
+ seed=seed,
+ )
+
+ if loop:
+ opts = parse_prompt(opts)
+
+ while opts is not None:
+ if opts.seed is None:
+ opts.seed = rng.seed()
+ print(f"Generating with seed {opts.seed}:\n{opts.prompt}")
+ t0 = time.perf_counter()
+
+ # prepare input
+ x = get_noise(
+ 1,
+ opts.height,
+ opts.width,
+ device=torch_device,
+ dtype=torch.bfloat16,
+ seed=opts.seed,
+ )
+ opts.seed = None
+ if offload:
+ ae = ae.cpu()
+ torch.cuda.empty_cache()
+ t5, clip = t5.to(torch_device), clip.to(torch_device)
+ inp = prepare(t5, clip, x, prompt=opts.prompt)
+ timesteps = get_schedule(opts.num_steps, inp["img"].shape[1], shift=(name != "flux-schnell"))
+
+ # offload TEs to CPU, load model to gpu
+ if offload:
+ t5, clip = t5.cpu(), clip.cpu()
+ torch.cuda.empty_cache()
+ model = model.to(torch_device)
+
+ # denoise initial noise
+ x = denoise(model, **inp, timesteps=timesteps, guidance=opts.guidance)
+
+ # offload model, load autoencoder to gpu
+ if offload:
+ model.cpu()
+ torch.cuda.empty_cache()
+ ae.decoder.to(x.device)
+
+ # decode latents to pixel space
+ x = unpack(x.float(), opts.height, opts.width)
+ with torch.autocast(device_type=torch_device.type, dtype=torch.bfloat16):
+ x = ae.decode(x)
+
+ if torch.cuda.is_available():
+ torch.cuda.synchronize()
+ t1 = time.perf_counter()
+
+ fn = output_name.format(idx=idx)
+ print(f"Done in {t1 - t0:.1f}s. Saving {fn}")
+
+ idx = save_image(nsfw_classifier, name, output_name, idx, x, add_sampling_metadata, prompt)
+
+ if loop:
+ print("-" * 80)
+ opts = parse_prompt(opts)
+ else:
+ opts = None
+
+
+def app():
+ Fire(main)
+
+
+if __name__ == "__main__":
+ app()
diff --git a/flux/cli_control.py b/flux/cli_control.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd83c89eee13639faa70f3dc2fa616abc5aabb15
--- /dev/null
+++ b/flux/cli_control.py
@@ -0,0 +1,347 @@
+import os
+import re
+import time
+from dataclasses import dataclass
+from glob import iglob
+
+import torch
+from fire import Fire
+from transformers import pipeline
+
+from flux.modules.image_embedders import CannyImageEncoder, DepthImageEncoder
+from flux.sampling import denoise, get_noise, get_schedule, prepare_control, unpack
+from flux.util import configs, load_ae, load_clip, load_flow_model, load_t5, save_image
+
+
+@dataclass
+class SamplingOptions:
+ prompt: str
+ width: int
+ height: int
+ num_steps: int
+ guidance: float
+ seed: int | None
+ img_cond_path: str
+ lora_scale: float | None
+
+
+def parse_prompt(options: SamplingOptions) -> SamplingOptions | None:
+ user_question = "Next prompt (write /h for help, /q to quit and leave empty to repeat):\n"
+ usage = (
+ "Usage: Either write your prompt directly, leave this field empty "
+ "to repeat the prompt or write a command starting with a slash:\n"
+ "- '/w ' will set the width of the generated image\n"
+ "- '/h ' will set the height of the generated image\n"
+ "- '/s ' sets the next seed\n"
+ "- '/g ' sets the guidance (flux-dev only)\n"
+ "- '/n ' sets the number of steps\n"
+ "- '/q' to quit"
+ )
+
+ while (prompt := input(user_question)).startswith("/"):
+ if prompt.startswith("/w"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, width = prompt.split()
+ options.width = 16 * (int(width) // 16)
+ print(
+ f"Setting resolution to {options.width} x {options.height} "
+ f"({options.height *options.width/1e6:.2f}MP)"
+ )
+ elif prompt.startswith("/h"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, height = prompt.split()
+ options.height = 16 * (int(height) // 16)
+ print(
+ f"Setting resolution to {options.width} x {options.height} "
+ f"({options.height *options.width/1e6:.2f}MP)"
+ )
+ elif prompt.startswith("/g"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, guidance = prompt.split()
+ options.guidance = float(guidance)
+ print(f"Setting guidance to {options.guidance}")
+ elif prompt.startswith("/s"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, seed = prompt.split()
+ options.seed = int(seed)
+ print(f"Setting seed to {options.seed}")
+ elif prompt.startswith("/n"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, steps = prompt.split()
+ options.num_steps = int(steps)
+ print(f"Setting number of steps to {options.num_steps}")
+ elif prompt.startswith("/q"):
+ print("Quitting")
+ return None
+ else:
+ if not prompt.startswith("/h"):
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ print(usage)
+ if prompt != "":
+ options.prompt = prompt
+ return options
+
+
+def parse_img_cond_path(options: SamplingOptions | None) -> SamplingOptions | None:
+ if options is None:
+ return None
+
+ user_question = "Next conditioning image (write /h for help, /q to quit and leave empty to repeat):\n"
+ usage = (
+ "Usage: Either write your prompt directly, leave this field empty "
+ "to repeat the conditioning image or write a command starting with a slash:\n"
+ "- '/q' to quit"
+ )
+
+ while True:
+ img_cond_path = input(user_question)
+
+ if img_cond_path.startswith("/"):
+ if img_cond_path.startswith("/q"):
+ print("Quitting")
+ return None
+ else:
+ if not img_cond_path.startswith("/h"):
+ print(f"Got invalid command '{img_cond_path}'\n{usage}")
+ print(usage)
+ continue
+
+ if img_cond_path == "":
+ break
+
+ if not os.path.isfile(img_cond_path) or not img_cond_path.lower().endswith(
+ (".jpg", ".jpeg", ".png", ".webp")
+ ):
+ print(f"File '{img_cond_path}' does not exist or is not a valid image file")
+ continue
+
+ options.img_cond_path = img_cond_path
+ break
+
+ return options
+
+
+def parse_lora_scale(options: SamplingOptions | None) -> tuple[SamplingOptions | None, bool]:
+ changed = False
+
+ if options is None:
+ return None, changed
+
+ user_question = "Next lora scale (write /h for help, /q to quit and leave empty to repeat):\n"
+ usage = (
+ "Usage: Either write your prompt directly, leave this field empty "
+ "to repeat the lora scale or write a command starting with a slash:\n"
+ "- '/q' to quit"
+ )
+
+ while (prompt := input(user_question)).startswith("/"):
+ if prompt.startswith("/q"):
+ print("Quitting")
+ return None, changed
+ else:
+ if not prompt.startswith("/h"):
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ print(usage)
+ if prompt != "":
+ options.lora_scale = float(prompt)
+ changed = True
+ return options, changed
+
+
+@torch.inference_mode()
+def main(
+ name: str,
+ width: int = 1024,
+ height: int = 1024,
+ seed: int | None = None,
+ prompt: str = "a robot made out of gold",
+ device: str = "cuda" if torch.cuda.is_available() else "cpu",
+ num_steps: int = 50,
+ loop: bool = False,
+ guidance: float | None = None,
+ offload: bool = False,
+ output_dir: str = "output",
+ add_sampling_metadata: bool = True,
+ img_cond_path: str = "assets/robot.webp",
+ lora_scale: float | None = 0.85,
+):
+ """
+ Sample the flux model. Either interactively (set `--loop`) or run for a
+ single image.
+
+ Args:
+ height: height of the sample in pixels (should be a multiple of 16)
+ width: width of the sample in pixels (should be a multiple of 16)
+ seed: Set a seed for sampling
+ output_name: where to save the output image, `{idx}` will be replaced
+ by the index of the sample
+ prompt: Prompt used for sampling
+ device: Pytorch device
+ num_steps: number of sampling steps (default 4 for schnell, 50 for guidance distilled)
+ loop: start an interactive session and sample multiple times
+ guidance: guidance value used for guidance distillation
+ add_sampling_metadata: Add the prompt to the image Exif metadata
+ img_cond_path: path to conditioning image (jpeg/png/webp)
+ """
+ nsfw_classifier = pipeline("image-classification", model="Falconsai/nsfw_image_detection", device=device)
+
+ assert name in [
+ "flux-dev-canny",
+ "flux-dev-depth",
+ "flux-dev-canny-lora",
+ "flux-dev-depth-lora",
+ ], f"Got unknown model name: {name}"
+ if guidance is None:
+ if name in ["flux-dev-canny", "flux-dev-canny-lora"]:
+ guidance = 30.0
+ elif name in ["flux-dev-depth", "flux-dev-depth-lora"]:
+ guidance = 10.0
+ else:
+ raise NotImplementedError()
+
+ if name not in configs:
+ available = ", ".join(configs.keys())
+ raise ValueError(f"Got unknown model name: {name}, chose from {available}")
+
+ torch_device = torch.device(device)
+
+ output_name = os.path.join(output_dir, "img_{idx}.jpg")
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ idx = 0
+ else:
+ fns = [fn for fn in iglob(output_name.format(idx="*")) if re.search(r"img_[0-9]+\.jpg$", fn)]
+ if len(fns) > 0:
+ idx = max(int(fn.split("_")[-1].split(".")[0]) for fn in fns) + 1
+ else:
+ idx = 0
+
+ # init all components
+ t5 = load_t5(torch_device, max_length=512)
+ clip = load_clip(torch_device)
+ model = load_flow_model(name, device="cpu" if offload else torch_device)
+ ae = load_ae(name, device="cpu" if offload else torch_device)
+
+ # set lora scale
+ if "lora" in name and lora_scale is not None:
+ for _, module in model.named_modules():
+ if hasattr(module, "set_scale"):
+ module.set_scale(lora_scale)
+
+ if name in ["flux-dev-depth", "flux-dev-depth-lora"]:
+ img_embedder = DepthImageEncoder(torch_device)
+ elif name in ["flux-dev-canny", "flux-dev-canny-lora"]:
+ img_embedder = CannyImageEncoder(torch_device)
+ else:
+ raise NotImplementedError()
+
+ rng = torch.Generator(device="cpu")
+ opts = SamplingOptions(
+ prompt=prompt,
+ width=width,
+ height=height,
+ num_steps=num_steps,
+ guidance=guidance,
+ seed=seed,
+ img_cond_path=img_cond_path,
+ lora_scale=lora_scale,
+ )
+
+ if loop:
+ opts = parse_prompt(opts)
+ opts = parse_img_cond_path(opts)
+ if "lora" in name:
+ opts, changed = parse_lora_scale(opts)
+ if changed:
+ # update the lora scale:
+ for _, module in model.named_modules():
+ if hasattr(module, "set_scale"):
+ module.set_scale(opts.lora_scale)
+
+ while opts is not None:
+ if opts.seed is None:
+ opts.seed = rng.seed()
+ print(f"Generating with seed {opts.seed}:\n{opts.prompt}")
+ t0 = time.perf_counter()
+
+ # prepare input
+ x = get_noise(
+ 1,
+ opts.height,
+ opts.width,
+ device=torch_device,
+ dtype=torch.bfloat16,
+ seed=opts.seed,
+ )
+ opts.seed = None
+ if offload:
+ t5, clip, ae = t5.to(torch_device), clip.to(torch_device), ae.to(torch_device)
+ inp = prepare_control(
+ t5,
+ clip,
+ x,
+ prompt=opts.prompt,
+ ae=ae,
+ encoder=img_embedder,
+ img_cond_path=opts.img_cond_path,
+ )
+ timesteps = get_schedule(opts.num_steps, inp["img"].shape[1], shift=(name != "flux-schnell"))
+
+ # offload TEs and AE to CPU, load model to gpu
+ if offload:
+ t5, clip, ae = t5.cpu(), clip.cpu(), ae.cpu()
+ torch.cuda.empty_cache()
+ model = model.to(torch_device)
+
+ # denoise initial noise
+ x = denoise(model, **inp, timesteps=timesteps, guidance=opts.guidance)
+
+ # offload model, load autoencoder to gpu
+ if offload:
+ model.cpu()
+ torch.cuda.empty_cache()
+ ae.decoder.to(x.device)
+
+ # decode latents to pixel space
+ x = unpack(x.float(), opts.height, opts.width)
+ with torch.autocast(device_type=torch_device.type, dtype=torch.bfloat16):
+ x = ae.decode(x)
+
+ if torch.cuda.is_available():
+ torch.cuda.synchronize()
+ t1 = time.perf_counter()
+ print(f"Done in {t1 - t0:.1f}s")
+
+ idx = save_image(nsfw_classifier, name, output_name, idx, x, add_sampling_metadata, prompt)
+
+ if loop:
+ print("-" * 80)
+ opts = parse_prompt(opts)
+ opts = parse_img_cond_path(opts)
+ if "lora" in name:
+ opts, changed = parse_lora_scale(opts)
+ if changed:
+ # update the lora scale:
+ for _, module in model.named_modules():
+ if hasattr(module, "set_scale"):
+ module.set_scale(opts.lora_scale)
+ else:
+ opts = None
+
+
+def app():
+ Fire(main)
+
+
+if __name__ == "__main__":
+ app()
diff --git a/flux/cli_fill.py b/flux/cli_fill.py
new file mode 100644
index 0000000000000000000000000000000000000000..415c04201e6dc13de1e343df43139e4e2ac82065
--- /dev/null
+++ b/flux/cli_fill.py
@@ -0,0 +1,334 @@
+import os
+import re
+import time
+from dataclasses import dataclass
+from glob import iglob
+
+import torch
+from fire import Fire
+from PIL import Image
+from transformers import pipeline
+
+from flux.sampling import denoise, get_noise, get_schedule, prepare_fill, unpack
+from flux.util import configs, load_ae, load_clip, load_flow_model, load_t5, save_image
+
+
+@dataclass
+class SamplingOptions:
+ prompt: str
+ width: int
+ height: int
+ num_steps: int
+ guidance: float
+ seed: int | None
+ img_cond_path: str
+ img_mask_path: str
+
+
+def parse_prompt(options: SamplingOptions) -> SamplingOptions | None:
+ user_question = "Next prompt (write /h for help, /q to quit and leave empty to repeat):\n"
+ usage = (
+ "Usage: Either write your prompt directly, leave this field empty "
+ "to repeat the prompt or write a command starting with a slash:\n"
+ "- '/s ' sets the next seed\n"
+ "- '/g ' sets the guidance (flux-dev only)\n"
+ "- '/n ' sets the number of steps\n"
+ "- '/q' to quit"
+ )
+
+ while (prompt := input(user_question)).startswith("/"):
+ if prompt.startswith("/g"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, guidance = prompt.split()
+ options.guidance = float(guidance)
+ print(f"Setting guidance to {options.guidance}")
+ elif prompt.startswith("/s"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, seed = prompt.split()
+ options.seed = int(seed)
+ print(f"Setting seed to {options.seed}")
+ elif prompt.startswith("/n"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, steps = prompt.split()
+ options.num_steps = int(steps)
+ print(f"Setting number of steps to {options.num_steps}")
+ elif prompt.startswith("/q"):
+ print("Quitting")
+ return None
+ else:
+ if not prompt.startswith("/h"):
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ print(usage)
+ if prompt != "":
+ options.prompt = prompt
+ return options
+
+
+def parse_img_cond_path(options: SamplingOptions | None) -> SamplingOptions | None:
+ if options is None:
+ return None
+
+ user_question = "Next conditioning image (write /h for help, /q to quit and leave empty to repeat):\n"
+ usage = (
+ "Usage: Either write your prompt directly, leave this field empty "
+ "to repeat the conditioning image or write a command starting with a slash:\n"
+ "- '/q' to quit"
+ )
+
+ while True:
+ img_cond_path = input(user_question)
+
+ if img_cond_path.startswith("/"):
+ if img_cond_path.startswith("/q"):
+ print("Quitting")
+ return None
+ else:
+ if not img_cond_path.startswith("/h"):
+ print(f"Got invalid command '{img_cond_path}'\n{usage}")
+ print(usage)
+ continue
+
+ if img_cond_path == "":
+ break
+
+ if not os.path.isfile(img_cond_path) or not img_cond_path.lower().endswith(
+ (".jpg", ".jpeg", ".png", ".webp")
+ ):
+ print(f"File '{img_cond_path}' does not exist or is not a valid image file")
+ continue
+ else:
+ with Image.open(img_cond_path) as img:
+ width, height = img.size
+
+ if width % 32 != 0 or height % 32 != 0:
+ print(f"Image dimensions must be divisible by 32, got {width}x{height}")
+ continue
+
+ options.img_cond_path = img_cond_path
+ break
+
+ return options
+
+
+def parse_img_mask_path(options: SamplingOptions | None) -> SamplingOptions | None:
+ if options is None:
+ return None
+
+ user_question = "Next conditioning mask (write /h for help, /q to quit and leave empty to repeat):\n"
+ usage = (
+ "Usage: Either write your prompt directly, leave this field empty "
+ "to repeat the conditioning mask or write a command starting with a slash:\n"
+ "- '/q' to quit"
+ )
+
+ while True:
+ img_mask_path = input(user_question)
+
+ if img_mask_path.startswith("/"):
+ if img_mask_path.startswith("/q"):
+ print("Quitting")
+ return None
+ else:
+ if not img_mask_path.startswith("/h"):
+ print(f"Got invalid command '{img_mask_path}'\n{usage}")
+ print(usage)
+ continue
+
+ if img_mask_path == "":
+ break
+
+ if not os.path.isfile(img_mask_path) or not img_mask_path.lower().endswith(
+ (".jpg", ".jpeg", ".png", ".webp")
+ ):
+ print(f"File '{img_mask_path}' does not exist or is not a valid image file")
+ continue
+ else:
+ with Image.open(img_mask_path) as img:
+ width, height = img.size
+
+ if width % 32 != 0 or height % 32 != 0:
+ print(f"Image dimensions must be divisible by 32, got {width}x{height}")
+ continue
+ else:
+ with Image.open(options.img_cond_path) as img_cond:
+ img_cond_width, img_cond_height = img_cond.size
+
+ if width != img_cond_width or height != img_cond_height:
+ print(
+ f"Mask dimensions must match conditioning image, got {width}x{height} and {img_cond_width}x{img_cond_height}"
+ )
+ continue
+
+ options.img_mask_path = img_mask_path
+ break
+
+ return options
+
+
+@torch.inference_mode()
+def main(
+ seed: int | None = None,
+ prompt: str = "a white paper cup",
+ device: str = "cuda" if torch.cuda.is_available() else "cpu",
+ num_steps: int = 50,
+ loop: bool = False,
+ guidance: float = 30.0,
+ offload: bool = False,
+ output_dir: str = "output",
+ add_sampling_metadata: bool = True,
+ img_cond_path: str = "assets/cup.png",
+ img_mask_path: str = "assets/cup_mask.png",
+):
+ """
+ Sample the flux model. Either interactively (set `--loop`) or run for a
+ single image. This demo assumes that the conditioning image and mask have
+ the same shape and that height and width are divisible by 32.
+
+ Args:
+ seed: Set a seed for sampling
+ output_name: where to save the output image, `{idx}` will be replaced
+ by the index of the sample
+ prompt: Prompt used for sampling
+ device: Pytorch device
+ num_steps: number of sampling steps (default 4 for schnell, 50 for guidance distilled)
+ loop: start an interactive session and sample multiple times
+ guidance: guidance value used for guidance distillation
+ add_sampling_metadata: Add the prompt to the image Exif metadata
+ img_cond_path: path to conditioning image (jpeg/png/webp)
+ img_mask_path: path to conditioning mask (jpeg/png/webp
+ """
+ nsfw_classifier = pipeline("image-classification", model="Falconsai/nsfw_image_detection", device=device)
+
+ name = "flux-dev-fill"
+ if name not in configs:
+ available = ", ".join(configs.keys())
+ raise ValueError(f"Got unknown model name: {name}, chose from {available}")
+
+ torch_device = torch.device(device)
+
+ output_name = os.path.join(output_dir, "img_{idx}.jpg")
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ idx = 0
+ else:
+ fns = [fn for fn in iglob(output_name.format(idx="*")) if re.search(r"img_[0-9]+\.jpg$", fn)]
+ if len(fns) > 0:
+ idx = max(int(fn.split("_")[-1].split(".")[0]) for fn in fns) + 1
+ else:
+ idx = 0
+
+ # init all components
+ t5 = load_t5(torch_device, max_length=128)
+ clip = load_clip(torch_device)
+ model = load_flow_model(name, device="cpu" if offload else torch_device)
+ ae = load_ae(name, device="cpu" if offload else torch_device)
+
+ rng = torch.Generator(device="cpu")
+ with Image.open(img_cond_path) as img:
+ width, height = img.size
+ opts = SamplingOptions(
+ prompt=prompt,
+ width=width,
+ height=height,
+ num_steps=num_steps,
+ guidance=guidance,
+ seed=seed,
+ img_cond_path=img_cond_path,
+ img_mask_path=img_mask_path,
+ )
+
+ if loop:
+ opts = parse_prompt(opts)
+ opts = parse_img_cond_path(opts)
+
+ with Image.open(opts.img_cond_path) as img:
+ width, height = img.size
+ opts.height = height
+ opts.width = width
+
+ opts = parse_img_mask_path(opts)
+
+ while opts is not None:
+ if opts.seed is None:
+ opts.seed = rng.seed()
+ print(f"Generating with seed {opts.seed}:\n{opts.prompt}")
+ t0 = time.perf_counter()
+
+ # prepare input
+ x = get_noise(
+ 1,
+ opts.height,
+ opts.width,
+ device=torch_device,
+ dtype=torch.bfloat16,
+ seed=opts.seed,
+ )
+ opts.seed = None
+ if offload:
+ t5, clip, ae = t5.to(torch_device), clip.to(torch_device), ae.to(torch.device)
+ inp = prepare_fill(
+ t5,
+ clip,
+ x,
+ prompt=opts.prompt,
+ ae=ae,
+ img_cond_path=opts.img_cond_path,
+ mask_path=opts.img_mask_path,
+ )
+
+ timesteps = get_schedule(opts.num_steps, inp["img"].shape[1], shift=(name != "flux-schnell"))
+
+ # offload TEs and AE to CPU, load model to gpu
+ if offload:
+ t5, clip, ae = t5.cpu(), clip.cpu(), ae.cpu()
+ torch.cuda.empty_cache()
+ model = model.to(torch_device)
+
+ # denoise initial noise
+ x = denoise(model, **inp, timesteps=timesteps, guidance=opts.guidance)
+
+ # offload model, load autoencoder to gpu
+ if offload:
+ model.cpu()
+ torch.cuda.empty_cache()
+ ae.decoder.to(x.device)
+
+ # decode latents to pixel space
+ x = unpack(x.float(), opts.height, opts.width)
+ with torch.autocast(device_type=torch_device.type, dtype=torch.bfloat16):
+ x = ae.decode(x)
+
+ if torch.cuda.is_available():
+ torch.cuda.synchronize()
+ t1 = time.perf_counter()
+ print(f"Done in {t1 - t0:.1f}s")
+
+ idx = save_image(nsfw_classifier, name, output_name, idx, x, add_sampling_metadata, prompt)
+
+ if loop:
+ print("-" * 80)
+ opts = parse_prompt(opts)
+ opts = parse_img_cond_path(opts)
+
+ with Image.open(opts.img_cond_path) as img:
+ width, height = img.size
+ opts.height = height
+ opts.width = width
+
+ opts = parse_img_mask_path(opts)
+ else:
+ opts = None
+
+
+def app():
+ Fire(main)
+
+
+if __name__ == "__main__":
+ app()
diff --git a/flux/cli_redux.py b/flux/cli_redux.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c03435a2809824905ae5779a89a2ba879cbd03f
--- /dev/null
+++ b/flux/cli_redux.py
@@ -0,0 +1,279 @@
+import os
+import re
+import time
+from dataclasses import dataclass
+from glob import iglob
+
+import torch
+from fire import Fire
+from transformers import pipeline
+
+from flux.modules.image_embedders import ReduxImageEncoder
+from flux.sampling import denoise, get_noise, get_schedule, prepare_redux, unpack
+from flux.util import configs, load_ae, load_clip, load_flow_model, load_t5, save_image
+
+
+@dataclass
+class SamplingOptions:
+ prompt: str
+ width: int
+ height: int
+ num_steps: int
+ guidance: float
+ seed: int | None
+ img_cond_path: str
+
+
+def parse_prompt(options: SamplingOptions) -> SamplingOptions | None:
+ user_question = "Write /h for help, /q to quit and leave empty to repeat):\n"
+ usage = (
+ "Usage: Leave this field empty to do nothing "
+ "or write a command starting with a slash:\n"
+ "- '/w ' will set the width of the generated image\n"
+ "- '/h ' will set the height of the generated image\n"
+ "- '/s ' sets the next seed\n"
+ "- '/g ' sets the guidance (flux-dev only)\n"
+ "- '/n ' sets the number of steps\n"
+ "- '/q' to quit"
+ )
+
+ while (prompt := input(user_question)).startswith("/"):
+ if prompt.startswith("/w"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, width = prompt.split()
+ options.width = 16 * (int(width) // 16)
+ print(
+ f"Setting resolution to {options.width} x {options.height} "
+ f"({options.height *options.width/1e6:.2f}MP)"
+ )
+ elif prompt.startswith("/h"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, height = prompt.split()
+ options.height = 16 * (int(height) // 16)
+ print(
+ f"Setting resolution to {options.width} x {options.height} "
+ f"({options.height *options.width/1e6:.2f}MP)"
+ )
+ elif prompt.startswith("/g"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, guidance = prompt.split()
+ options.guidance = float(guidance)
+ print(f"Setting guidance to {options.guidance}")
+ elif prompt.startswith("/s"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, seed = prompt.split()
+ options.seed = int(seed)
+ print(f"Setting seed to {options.seed}")
+ elif prompt.startswith("/n"):
+ if prompt.count(" ") != 1:
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ continue
+ _, steps = prompt.split()
+ options.num_steps = int(steps)
+ print(f"Setting number of steps to {options.num_steps}")
+ elif prompt.startswith("/q"):
+ print("Quitting")
+ return None
+ else:
+ if not prompt.startswith("/h"):
+ print(f"Got invalid command '{prompt}'\n{usage}")
+ print(usage)
+ return options
+
+
+def parse_img_cond_path(options: SamplingOptions | None) -> SamplingOptions | None:
+ if options is None:
+ return None
+
+ user_question = "Next conditioning image (write /h for help, /q to quit and leave empty to repeat):\n"
+ usage = (
+ "Usage: Either write your prompt directly, leave this field empty "
+ "to repeat the conditioning image or write a command starting with a slash:\n"
+ "- '/q' to quit"
+ )
+
+ while True:
+ img_cond_path = input(user_question)
+
+ if img_cond_path.startswith("/"):
+ if img_cond_path.startswith("/q"):
+ print("Quitting")
+ return None
+ else:
+ if not img_cond_path.startswith("/h"):
+ print(f"Got invalid command '{img_cond_path}'\n{usage}")
+ print(usage)
+ continue
+
+ if img_cond_path == "":
+ break
+
+ if not os.path.isfile(img_cond_path) or not img_cond_path.lower().endswith(
+ (".jpg", ".jpeg", ".png", ".webp")
+ ):
+ print(f"File '{img_cond_path}' does not exist or is not a valid image file")
+ continue
+
+ options.img_cond_path = img_cond_path
+ break
+
+ return options
+
+
+@torch.inference_mode()
+def main(
+ name: str = "flux-dev",
+ width: int = 1360,
+ height: int = 768,
+ seed: int | None = None,
+ device: str = "cuda" if torch.cuda.is_available() else "cpu",
+ num_steps: int | None = None,
+ loop: bool = False,
+ guidance: float = 2.5,
+ offload: bool = False,
+ output_dir: str = "output",
+ add_sampling_metadata: bool = True,
+ img_cond_path: str = "assets/robot.webp",
+):
+ """
+ Sample the flux model. Either interactively (set `--loop`) or run for a
+ single image.
+
+ Args:
+ name: Name of the model to load
+ height: height of the sample in pixels (should be a multiple of 16)
+ width: width of the sample in pixels (should be a multiple of 16)
+ seed: Set a seed for sampling
+ output_name: where to save the output image, `{idx}` will be replaced
+ by the index of the sample
+ prompt: Prompt used for sampling
+ device: Pytorch device
+ num_steps: number of sampling steps (default 4 for schnell, 50 for guidance distilled)
+ loop: start an interactive session and sample multiple times
+ guidance: guidance value used for guidance distillation
+ add_sampling_metadata: Add the prompt to the image Exif metadata
+ img_cond_path: path to conditioning image (jpeg/png/webp)
+ """
+ nsfw_classifier = pipeline("image-classification", model="Falconsai/nsfw_image_detection", device=device)
+
+ if name not in configs:
+ available = ", ".join(configs.keys())
+ raise ValueError(f"Got unknown model name: {name}, chose from {available}")
+
+ torch_device = torch.device(device)
+ if num_steps is None:
+ num_steps = 4 if name == "flux-schnell" else 50
+
+ output_name = os.path.join(output_dir, "img_{idx}.jpg")
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ idx = 0
+ else:
+ fns = [fn for fn in iglob(output_name.format(idx="*")) if re.search(r"img_[0-9]+\.jpg$", fn)]
+ if len(fns) > 0:
+ idx = max(int(fn.split("_")[-1].split(".")[0]) for fn in fns) + 1
+ else:
+ idx = 0
+
+ # init all components
+ t5 = load_t5(torch_device, max_length=256 if name == "flux-schnell" else 512)
+ clip = load_clip(torch_device)
+ model = load_flow_model(name, device="cpu" if offload else torch_device)
+ ae = load_ae(name, device="cpu" if offload else torch_device)
+ img_embedder = ReduxImageEncoder(torch_device)
+
+ rng = torch.Generator(device="cpu")
+ prompt = ""
+ opts = SamplingOptions(
+ prompt=prompt,
+ width=width,
+ height=height,
+ num_steps=num_steps,
+ guidance=guidance,
+ seed=seed,
+ img_cond_path=img_cond_path,
+ )
+
+ if loop:
+ opts = parse_prompt(opts)
+ opts = parse_img_cond_path(opts)
+
+ while opts is not None:
+ if opts.seed is None:
+ opts.seed = rng.seed()
+ print(f"Generating with seed {opts.seed}:\n{opts.prompt}")
+ t0 = time.perf_counter()
+
+ # prepare input
+ x = get_noise(
+ 1,
+ opts.height,
+ opts.width,
+ device=torch_device,
+ dtype=torch.bfloat16,
+ seed=opts.seed,
+ )
+ opts.seed = None
+ if offload:
+ ae = ae.cpu()
+ torch.cuda.empty_cache()
+ t5, clip = t5.to(torch_device), clip.to(torch_device)
+ inp = prepare_redux(
+ t5,
+ clip,
+ x,
+ prompt=opts.prompt,
+ encoder=img_embedder,
+ img_cond_path=opts.img_cond_path,
+ )
+ timesteps = get_schedule(opts.num_steps, inp["img"].shape[1], shift=(name != "flux-schnell"))
+
+ # offload TEs to CPU, load model to gpu
+ if offload:
+ t5, clip = t5.cpu(), clip.cpu()
+ torch.cuda.empty_cache()
+ model = model.to(torch_device)
+
+ # denoise initial noise
+ x = denoise(model, **inp, timesteps=timesteps, guidance=opts.guidance)
+
+ # offload model, load autoencoder to gpu
+ if offload:
+ model.cpu()
+ torch.cuda.empty_cache()
+ ae.decoder.to(x.device)
+
+ # decode latents to pixel space
+ x = unpack(x.float(), opts.height, opts.width)
+ with torch.autocast(device_type=torch_device.type, dtype=torch.bfloat16):
+ x = ae.decode(x)
+
+ if torch.cuda.is_available():
+ torch.cuda.synchronize()
+ t1 = time.perf_counter()
+ print(f"Done in {t1 - t0:.1f}s")
+
+ idx = save_image(nsfw_classifier, name, output_name, idx, x, add_sampling_metadata, prompt)
+
+ if loop:
+ print("-" * 80)
+ opts = parse_prompt(opts)
+ opts = parse_img_cond_path(opts)
+ else:
+ opts = None
+
+
+def app():
+ Fire(main)
+
+
+if __name__ == "__main__":
+ app()
diff --git a/flux/math.py b/flux/math.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bb88c1814eb68a7a0ea549fdcfaacfa77170668
--- /dev/null
+++ b/flux/math.py
@@ -0,0 +1,117 @@
+from einops import rearrange
+import torch
+from torch import Tensor
+import torch.nn.functional as F
+from flash_attn import flash_attn_varlen_func
+from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
+
+
+def _upad_input(query_layer, key_layer, value_layer, query_mask, key_mask, query_length):
+ def _get_unpad_data(attention_mask):
+ seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
+ indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
+ max_seqlen_in_batch = seqlens_in_batch.max().item()
+ cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0))
+ return (
+ indices,
+ cu_seqlens,
+ max_seqlen_in_batch,
+ )
+
+ indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(key_mask)
+ _, q_seq_len, num_query_heads, _ = query_layer.shape
+ batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape
+
+ key_layer = index_first_axis(
+ key_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim),
+ indices_k,
+ )
+ value_layer = index_first_axis(
+ value_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim),
+ indices_k,
+ )
+ if query_length == kv_seq_len and key_mask is None:
+ query_layer = index_first_axis(
+ query_layer.reshape(batch_size * kv_seq_len, num_query_heads, head_dim),
+ indices_k,
+ )
+ cu_seqlens_q = cu_seqlens_k
+ max_seqlen_in_batch_q = max_seqlen_in_batch_k
+ indices_q = indices_k
+ elif query_length == 1:
+ max_seqlen_in_batch_q = 1
+ cu_seqlens_q = torch.arange(
+ batch_size + 1, dtype=torch.int32, device=query_layer.device
+ ) # There is a memcpy here, that is very bad.
+ indices_q = cu_seqlens_q[:-1]
+ query_layer = query_layer.squeeze(1)
+ else:
+ # The -q_len: slice assumes left padding.
+ query_mask = query_mask[:, -query_length:]
+ query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q, _ = unpad_input(query_layer, query_mask)
+
+ return (
+ query_layer,
+ key_layer,
+ value_layer,
+ indices_q,
+ (cu_seqlens_q, cu_seqlens_k),
+ (max_seqlen_in_batch_q, max_seqlen_in_batch_k),
+ )
+
+
+def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, attn_mask: Tensor | None = None, drop_mask: Tensor | None = None) -> Tensor:
+ q, k = apply_rope(q, k, pe)
+
+ q = q.transpose(1, 2)
+ k = k.transpose(1, 2)
+ v = v.transpose(1, 2)
+ B, L, H, D = q.shape
+
+ if drop_mask is None: # todo: remove drop mask
+ drop_mask = attn_mask
+ (
+ query_states,
+ key_states,
+ value_states,
+ indices_q,
+ cu_seq_lens,
+ max_seq_lens,
+ ) = _upad_input(q, k, v, attn_mask, drop_mask, L)
+
+ cu_seqlens_q, cu_seqlens_k = cu_seq_lens
+ max_seqlen_in_batch_q, max_seqlen_in_batch_k = max_seq_lens
+
+ attn_output_unpad = flash_attn_varlen_func(
+ query_states,
+ key_states,
+ value_states,
+ cu_seqlens_q=cu_seqlens_q,
+ cu_seqlens_k=cu_seqlens_k,
+ max_seqlen_q=max_seqlen_in_batch_q,
+ max_seqlen_k=max_seqlen_in_batch_k,
+ dropout_p=0.0,
+ causal=False,
+ )
+ x = pad_input(attn_output_unpad, indices_q, B, L)
+ x = rearrange(x, "B L H D -> B L (H D)")
+
+ return x
+
+
+def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
+ assert dim % 2 == 0
+ scale = torch.arange(0, dim, 2, dtype=torch.float64, device=pos.device) / dim
+ omega = 1.0 / (theta**scale)
+ out = torch.einsum("...n,d->...nd", pos, omega)
+ out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
+ out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
+ return out.float()
+
+
+def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]:
+ xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
+ xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
+ xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
+ xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
+ return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
diff --git a/flux/model.py b/flux/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d3ae1acbcced57a1a30e641c18aa8cae9a45bd5
--- /dev/null
+++ b/flux/model.py
@@ -0,0 +1,175 @@
+from dataclasses import dataclass
+from typing import List
+
+import torch
+from torch import Tensor, nn
+
+from flux.modules.layers import (
+ DoubleStreamBlock,
+ EmbedND,
+ LastLayer,
+ MLPEmbedder,
+ SingleStreamBlock,
+ timestep_embedding,
+)
+from flux.modules.lora import LinearLora, replace_linear_with_lora
+
+
+@dataclass
+class FluxParams:
+ in_channels: int
+ out_channels: int
+ vec_in_dim: int
+ context_in_dim: int
+ hidden_size: int
+ mlp_ratio: float
+ num_heads: int
+ depth: int
+ depth_single_blocks: int
+ axes_dim: list[int]
+ theta: int
+ qkv_bias: bool
+ guidance_embed: bool
+
+
+class Flux(nn.Module):
+ """
+ Transformer model for flow matching on sequences.
+ """
+
+ def __init__(self, params: FluxParams):
+ super().__init__()
+
+ self.params = params
+ self.in_channels = params.in_channels
+ self.out_channels = params.out_channels
+ if params.hidden_size % params.num_heads != 0:
+ raise ValueError(
+ f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}"
+ )
+ pe_dim = params.hidden_size // params.num_heads
+ if sum(params.axes_dim) != pe_dim:
+ raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}")
+ self.hidden_size = params.hidden_size
+ self.num_heads = params.num_heads
+ self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim)
+ self.img_in = nn.Linear(self.in_channels, self.hidden_size, bias=True)
+ self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size)
+ self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size)
+ self.guidance_in = (
+ MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size) if params.guidance_embed else nn.Identity()
+ )
+ self.txt_in = nn.Linear(params.context_in_dim, self.hidden_size)
+
+ self.double_blocks = nn.ModuleList(
+ [
+ DoubleStreamBlock(
+ self.hidden_size,
+ self.num_heads,
+ mlp_ratio=params.mlp_ratio,
+ qkv_bias=params.qkv_bias,
+ )
+ for _ in range(params.depth)
+ ]
+ )
+
+ self.single_blocks = nn.ModuleList(
+ [
+ SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio)
+ for _ in range(params.depth_single_blocks)
+ ]
+ )
+
+ self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels)
+
+ def forward(
+ self,
+ img: Tensor,
+ img_ids: Tensor,
+ txt: Tensor,
+ txt_ids: Tensor,
+ timesteps: Tensor,
+ y: Tensor,
+ txt_mask: Tensor = None,
+ img_mask: Tensor = None,
+ guidance: Tensor | None = None,
+ ) -> Tensor:
+ if img.ndim != 3 or txt.ndim != 3:
+ raise ValueError("Input img and txt tensors must have 3 dimensions.")
+
+ # running on sequences img
+ img = self.img_in(img)
+ vec = self.time_in(timestep_embedding(timesteps, 256))
+ if self.params.guidance_embed:
+ if guidance is None:
+ raise ValueError("Didn't get guidance strength for guidance distilled model.")
+ vec = vec + self.guidance_in(timestep_embedding(guidance, 256))
+ vec = vec + self.vector_in(y)
+ txt = self.txt_in(txt)
+
+ ids = torch.cat((txt_ids, img_ids), dim=1)
+ pe = self.pe_embedder(ids)
+
+ for block in self.double_blocks:
+ img, txt = block(img=img, txt=txt, vec=vec, pe=pe, img_mask=img_mask, txt_mask=txt_mask)
+
+ img = torch.cat((txt, img), 1)
+ attn_mask = torch.cat((txt_mask, img_mask), 1)
+ for block in self.single_blocks:
+ img = block(img, vec=vec, pe=pe, attn_mask=attn_mask)
+ img = img[:, txt.shape[1] :, ...]
+
+ img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels)
+ # print(f'flux out {img.shape} {img.mean()}')
+ return img
+
+ def forward_with_cfg(
+ self,
+ img: Tensor,
+ img_ids: Tensor,
+ txt: Tensor,
+ txt_ids: Tensor,
+ timesteps: Tensor,
+ y: Tensor,
+ txt_mask: Tensor = None,
+ img_mask: Tensor = None,
+ guidance: Tensor | None = None,
+ cfg_scale: float = 1.0,
+ ) -> Tensor:
+ half = img[: len(img) // 2]
+ combined = torch.cat([half, half], dim=0)
+ model_out = self.forward(img, img_ids, txt, txt_ids, timesteps, y, txt_mask, img_mask, guidance)
+ cond_v, uncond_v = torch.split(model_out, len(model_out) // 2, dim=0)
+ cond_v = uncond_v + cfg_scale * (cond_v - uncond_v)
+ img = torch.cat([cond_v, uncond_v], dim=0)
+ return img
+
+ def get_fsdp_wrap_module_list(self) -> List[nn.Module]:
+ return list(self.double_blocks) + list(self.single_blocks) + [self.final_layer] + [self.img_in, self.vector_in, self.guidance_in, self.txt_in, self.time_in]
+
+ def get_checkpointing_wrap_module_list(self) -> List[nn.Module]:
+ return list(self.double_blocks) + list(self.single_blocks) + [self.final_layer] + [self.img_in, self.vector_in, self.guidance_in, self.txt_in, self.time_in]
+
+
+class FluxLoraWrapper(Flux):
+ def __init__(
+ self,
+ lora_rank: int = 128,
+ lora_scale: float = 1.0,
+ *args,
+ **kwargs,
+ ) -> None:
+ super().__init__(*args, **kwargs)
+
+ self.lora_rank = lora_rank
+
+ replace_linear_with_lora(
+ self,
+ max_rank=lora_rank,
+ scale=lora_scale,
+ )
+
+ def set_lora_scale(self, scale: float) -> None:
+ for module in self.modules():
+ if isinstance(module, LinearLora):
+ module.set_scale(scale=scale)
diff --git a/flux/modules/autoencoder.py b/flux/modules/autoencoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..75159f711f65f064107a1a1b9be6f09fc9872028
--- /dev/null
+++ b/flux/modules/autoencoder.py
@@ -0,0 +1,312 @@
+from dataclasses import dataclass
+
+import torch
+from einops import rearrange
+from torch import Tensor, nn
+
+
+@dataclass
+class AutoEncoderParams:
+ resolution: int
+ in_channels: int
+ ch: int
+ out_ch: int
+ ch_mult: list[int]
+ num_res_blocks: int
+ z_channels: int
+ scale_factor: float
+ shift_factor: float
+
+
+def swish(x: Tensor) -> Tensor:
+ return x * torch.sigmoid(x)
+
+
+class AttnBlock(nn.Module):
+ def __init__(self, in_channels: int):
+ super().__init__()
+ self.in_channels = in_channels
+
+ self.norm = nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+
+ self.q = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+ self.k = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+ self.v = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+ self.proj_out = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+
+ def attention(self, h_: Tensor) -> Tensor:
+ h_ = self.norm(h_)
+ q = self.q(h_)
+ k = self.k(h_)
+ v = self.v(h_)
+
+ b, c, h, w = q.shape
+ q = rearrange(q, "b c h w -> b 1 (h w) c").contiguous()
+ k = rearrange(k, "b c h w -> b 1 (h w) c").contiguous()
+ v = rearrange(v, "b c h w -> b 1 (h w) c").contiguous()
+ h_ = nn.functional.scaled_dot_product_attention(q, k, v)
+
+ return rearrange(h_, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b)
+
+ def forward(self, x: Tensor) -> Tensor:
+ return x + self.proj_out(self.attention(x))
+
+
+class ResnetBlock(nn.Module):
+ def __init__(self, in_channels: int, out_channels: int):
+ super().__init__()
+ self.in_channels = in_channels
+ out_channels = in_channels if out_channels is None else out_channels
+ self.out_channels = out_channels
+
+ self.norm1 = nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+ self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+ self.norm2 = nn.GroupNorm(num_groups=32, num_channels=out_channels, eps=1e-6, affine=True)
+ self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+ if self.in_channels != self.out_channels:
+ self.nin_shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+
+ def forward(self, x):
+ h = x
+ h = self.norm1(h)
+ h = swish(h)
+ h = self.conv1(h)
+
+ h = self.norm2(h)
+ h = swish(h)
+ h = self.conv2(h)
+
+ if self.in_channels != self.out_channels:
+ x = self.nin_shortcut(x)
+
+ return x + h
+
+
+class Downsample(nn.Module):
+ def __init__(self, in_channels: int):
+ super().__init__()
+ # no asymmetric padding in torch conv, must do it ourselves
+ self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0)
+
+ def forward(self, x: Tensor):
+ pad = (0, 1, 0, 1)
+ x = nn.functional.pad(x, pad, mode="constant", value=0)
+ x = self.conv(x)
+ return x
+
+
+class Upsample(nn.Module):
+ def __init__(self, in_channels: int):
+ super().__init__()
+ self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
+
+ def forward(self, x: Tensor):
+ x = nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
+ x = self.conv(x)
+ return x
+
+
+class Encoder(nn.Module):
+ def __init__(
+ self,
+ resolution: int,
+ in_channels: int,
+ ch: int,
+ ch_mult: list[int],
+ num_res_blocks: int,
+ z_channels: int,
+ ):
+ super().__init__()
+ self.ch = ch
+ self.num_resolutions = len(ch_mult)
+ self.num_res_blocks = num_res_blocks
+ self.resolution = resolution
+ self.in_channels = in_channels
+ # downsampling
+ self.conv_in = nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1)
+
+ curr_res = resolution
+ in_ch_mult = (1,) + tuple(ch_mult)
+ self.in_ch_mult = in_ch_mult
+ self.down = nn.ModuleList()
+ block_in = self.ch
+ for i_level in range(self.num_resolutions):
+ block = nn.ModuleList()
+ attn = nn.ModuleList()
+ block_in = ch * in_ch_mult[i_level]
+ block_out = ch * ch_mult[i_level]
+ for _ in range(self.num_res_blocks):
+ block.append(ResnetBlock(in_channels=block_in, out_channels=block_out))
+ block_in = block_out
+ down = nn.Module()
+ down.block = block
+ down.attn = attn
+ if i_level != self.num_resolutions - 1:
+ down.downsample = Downsample(block_in)
+ curr_res = curr_res // 2
+ self.down.append(down)
+
+ # middle
+ self.mid = nn.Module()
+ self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in)
+ self.mid.attn_1 = AttnBlock(block_in)
+ self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in)
+
+ # end
+ self.norm_out = nn.GroupNorm(num_groups=32, num_channels=block_in, eps=1e-6, affine=True)
+ self.conv_out = nn.Conv2d(block_in, 2 * z_channels, kernel_size=3, stride=1, padding=1)
+
+ def forward(self, x: Tensor) -> Tensor:
+ # downsampling
+ hs = [self.conv_in(x)]
+ for i_level in range(self.num_resolutions):
+ for i_block in range(self.num_res_blocks):
+ h = self.down[i_level].block[i_block](hs[-1])
+ if len(self.down[i_level].attn) > 0:
+ h = self.down[i_level].attn[i_block](h)
+ hs.append(h)
+ if i_level != self.num_resolutions - 1:
+ hs.append(self.down[i_level].downsample(hs[-1]))
+
+ # middle
+ h = hs[-1]
+ h = self.mid.block_1(h)
+ h = self.mid.attn_1(h)
+ h = self.mid.block_2(h)
+ # end
+ h = self.norm_out(h)
+ h = swish(h)
+ h = self.conv_out(h)
+ return h
+
+
+class Decoder(nn.Module):
+ def __init__(
+ self,
+ ch: int,
+ out_ch: int,
+ ch_mult: list[int],
+ num_res_blocks: int,
+ in_channels: int,
+ resolution: int,
+ z_channels: int,
+ ):
+ super().__init__()
+ self.ch = ch
+ self.num_resolutions = len(ch_mult)
+ self.num_res_blocks = num_res_blocks
+ self.resolution = resolution
+ self.in_channels = in_channels
+ self.ffactor = 2 ** (self.num_resolutions - 1)
+
+ # compute in_ch_mult, block_in and curr_res at lowest res
+ block_in = ch * ch_mult[self.num_resolutions - 1]
+ curr_res = resolution // 2 ** (self.num_resolutions - 1)
+ self.z_shape = (1, z_channels, curr_res, curr_res)
+
+ # z to block_in
+ self.conv_in = nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1)
+
+ # middle
+ self.mid = nn.Module()
+ self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in)
+ self.mid.attn_1 = AttnBlock(block_in)
+ self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in)
+
+ # upsampling
+ self.up = nn.ModuleList()
+ for i_level in reversed(range(self.num_resolutions)):
+ block = nn.ModuleList()
+ attn = nn.ModuleList()
+ block_out = ch * ch_mult[i_level]
+ for _ in range(self.num_res_blocks + 1):
+ block.append(ResnetBlock(in_channels=block_in, out_channels=block_out))
+ block_in = block_out
+ up = nn.Module()
+ up.block = block
+ up.attn = attn
+ if i_level != 0:
+ up.upsample = Upsample(block_in)
+ curr_res = curr_res * 2
+ self.up.insert(0, up) # prepend to get consistent order
+
+ # end
+ self.norm_out = nn.GroupNorm(num_groups=32, num_channels=block_in, eps=1e-6, affine=True)
+ self.conv_out = nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1)
+
+ def forward(self, z: Tensor) -> Tensor:
+ # z to block_in
+ h = self.conv_in(z)
+
+ # middle
+ h = self.mid.block_1(h)
+ h = self.mid.attn_1(h)
+ h = self.mid.block_2(h)
+
+ # upsampling
+ for i_level in reversed(range(self.num_resolutions)):
+ for i_block in range(self.num_res_blocks + 1):
+ h = self.up[i_level].block[i_block](h)
+ if len(self.up[i_level].attn) > 0:
+ h = self.up[i_level].attn[i_block](h)
+ if i_level != 0:
+ h = self.up[i_level].upsample(h)
+
+ # end
+ h = self.norm_out(h)
+ h = swish(h)
+ h = self.conv_out(h)
+ return h
+
+
+class DiagonalGaussian(nn.Module):
+ def __init__(self, sample: bool = True, chunk_dim: int = 1):
+ super().__init__()
+ self.sample = sample
+ self.chunk_dim = chunk_dim
+
+ def forward(self, z: Tensor) -> Tensor:
+ mean, logvar = torch.chunk(z, 2, dim=self.chunk_dim)
+ if self.sample:
+ std = torch.exp(0.5 * logvar)
+ return mean + std * torch.randn_like(mean)
+ else:
+ return mean
+
+
+class AutoEncoder(nn.Module):
+ def __init__(self, params: AutoEncoderParams):
+ super().__init__()
+ self.encoder = Encoder(
+ resolution=params.resolution,
+ in_channels=params.in_channels,
+ ch=params.ch,
+ ch_mult=params.ch_mult,
+ num_res_blocks=params.num_res_blocks,
+ z_channels=params.z_channels,
+ )
+ self.decoder = Decoder(
+ resolution=params.resolution,
+ in_channels=params.in_channels,
+ ch=params.ch,
+ out_ch=params.out_ch,
+ ch_mult=params.ch_mult,
+ num_res_blocks=params.num_res_blocks,
+ z_channels=params.z_channels,
+ )
+ self.reg = DiagonalGaussian()
+
+ self.scale_factor = params.scale_factor
+ self.shift_factor = params.shift_factor
+
+ def encode(self, x: Tensor) -> Tensor:
+ z = self.reg(self.encoder(x))
+ z = self.scale_factor * (z - self.shift_factor)
+ return z
+
+ def decode(self, z: Tensor) -> Tensor:
+ z = z / self.scale_factor + self.shift_factor
+ return self.decoder(z)
+
+ def forward(self, x: Tensor) -> Tensor:
+ return self.decode(self.encode(x))
diff --git a/flux/modules/conditioner.py b/flux/modules/conditioner.py
new file mode 100644
index 0000000000000000000000000000000000000000..e60297e45813862ffdf03b79fd8fbe5b4a17029d
--- /dev/null
+++ b/flux/modules/conditioner.py
@@ -0,0 +1,37 @@
+from torch import Tensor, nn
+from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer
+
+
+class HFEmbedder(nn.Module):
+ def __init__(self, version: str, max_length: int, **hf_kwargs):
+ super().__init__()
+ self.is_clip = version.startswith("openai")
+ self.max_length = max_length
+ self.output_key = "pooler_output" if self.is_clip else "last_hidden_state"
+
+ if self.is_clip:
+ self.tokenizer: CLIPTokenizer = CLIPTokenizer.from_pretrained(version, max_length=max_length)
+ self.hf_module: CLIPTextModel = CLIPTextModel.from_pretrained(version, **hf_kwargs)
+ else:
+ self.tokenizer: T5Tokenizer = T5Tokenizer.from_pretrained(version, max_length=max_length)
+ self.hf_module: T5EncoderModel = T5EncoderModel.from_pretrained(version, **hf_kwargs)
+
+ self.hf_module = self.hf_module.eval().requires_grad_(False)
+
+ def forward(self, text: list[str]) -> Tensor:
+ batch_encoding = self.tokenizer(
+ text,
+ truncation=True,
+ max_length=self.max_length,
+ return_length=False,
+ return_overflowing_tokens=False,
+ padding="max_length",
+ return_tensors="pt",
+ )
+
+ outputs = self.hf_module(
+ input_ids=batch_encoding["input_ids"].to(self.hf_module.device),
+ attention_mask=None,
+ output_hidden_states=False,
+ )
+ return outputs[self.output_key]
diff --git a/flux/modules/image_embedders.py b/flux/modules/image_embedders.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7177d2f3cb33f61c6d1b3423c5b2b56dc36ab04
--- /dev/null
+++ b/flux/modules/image_embedders.py
@@ -0,0 +1,103 @@
+import os
+
+import cv2
+import numpy as np
+import torch
+from einops import rearrange, repeat
+from PIL import Image
+from safetensors.torch import load_file as load_sft
+from torch import nn
+from transformers import AutoModelForDepthEstimation, AutoProcessor, SiglipImageProcessor, SiglipVisionModel
+
+from flux.util import print_load_warning
+
+
+class DepthImageEncoder:
+ depth_model_name = "LiheYoung/depth-anything-large-hf"
+
+ def __init__(self, device):
+ self.device = device
+ self.depth_model = AutoModelForDepthEstimation.from_pretrained(self.depth_model_name).to(device)
+ self.processor = AutoProcessor.from_pretrained(self.depth_model_name)
+
+ def __call__(self, img: torch.Tensor) -> torch.Tensor:
+ hw = img.shape[-2:]
+
+ img = torch.clamp(img, -1.0, 1.0)
+ img_byte = ((img + 1.0) * 127.5).byte()
+
+ img = self.processor(img_byte, return_tensors="pt")["pixel_values"]
+ depth = self.depth_model(img.to(self.device)).predicted_depth
+ depth = repeat(depth, "b h w -> b 3 h w")
+ depth = torch.nn.functional.interpolate(depth, hw, mode="bicubic", antialias=True)
+
+ depth = depth / 127.5 - 1.0
+ return depth
+
+
+class CannyImageEncoder:
+ def __init__(
+ self,
+ device,
+ min_t: int = 50,
+ max_t: int = 200,
+ ):
+ self.device = device
+ self.min_t = min_t
+ self.max_t = max_t
+
+ def __call__(self, img: torch.Tensor) -> torch.Tensor:
+ assert img.shape[0] == 1, "Only batch size 1 is supported"
+
+ img = rearrange(img[0], "c h w -> h w c")
+ img = torch.clamp(img, -1.0, 1.0)
+ img_np = ((img + 1.0) * 127.5).numpy().astype(np.uint8)
+
+ # Apply Canny edge detection
+ canny = cv2.Canny(img_np, self.min_t, self.max_t)
+
+ # Convert back to torch tensor and reshape
+ canny = torch.from_numpy(canny).float() / 127.5 - 1.0
+ canny = rearrange(canny, "h w -> 1 1 h w")
+ canny = repeat(canny, "b 1 ... -> b 3 ...")
+ return canny.to(self.device)
+
+
+class ReduxImageEncoder(nn.Module):
+ siglip_model_name = "google/siglip-so400m-patch14-384"
+
+ def __init__(
+ self,
+ device,
+ redux_dim: int = 1152,
+ txt_in_features: int = 4096,
+ redux_path: str | None = os.getenv("FLUX_REDUX"),
+ dtype=torch.bfloat16,
+ ) -> None:
+ assert redux_path is not None, "Redux path must be provided"
+
+ super().__init__()
+
+ self.redux_dim = redux_dim
+ self.device = device if isinstance(device, torch.device) else torch.device(device)
+ self.dtype = dtype
+
+ with self.device:
+ self.redux_up = nn.Linear(redux_dim, txt_in_features * 3, dtype=dtype)
+ self.redux_down = nn.Linear(txt_in_features * 3, txt_in_features, dtype=dtype)
+
+ sd = load_sft(redux_path, device=str(device))
+ missing, unexpected = self.load_state_dict(sd, strict=False, assign=True)
+ print_load_warning(missing, unexpected)
+
+ self.siglip = SiglipVisionModel.from_pretrained(self.siglip_model_name).to(dtype=dtype)
+ self.normalize = SiglipImageProcessor.from_pretrained(self.siglip_model_name)
+
+ def __call__(self, x: Image.Image) -> torch.Tensor:
+ imgs = self.normalize.preprocess(images=[x], do_resize=True, return_tensors="pt", do_convert_rgb=True)
+
+ _encoded_x = self.siglip(**imgs.to(device=self.device, dtype=self.dtype)).last_hidden_state
+
+ projected_x = self.redux_down(nn.functional.silu(self.redux_up(_encoded_x)))
+
+ return projected_x
diff --git a/flux/modules/layers.py b/flux/modules/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1533887a01377d3e2c9d7f4e0588c1ce56693c3
--- /dev/null
+++ b/flux/modules/layers.py
@@ -0,0 +1,259 @@
+import math
+from dataclasses import dataclass
+
+import torch
+from einops import rearrange
+from torch import Tensor, nn
+
+from flux.math import attention, rope
+
+
+class EmbedND(nn.Module):
+ def __init__(self, dim: int, theta: int, axes_dim: list[int]):
+ super().__init__()
+ self.dim = dim
+ self.theta = theta
+ self.axes_dim = axes_dim
+
+ def forward(self, ids: Tensor) -> Tensor:
+ n_axes = ids.shape[-1]
+ emb = torch.cat(
+ [rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)],
+ dim=-3,
+ )
+
+ return emb.unsqueeze(1)
+
+
+def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 1000.0):
+ """
+ Create sinusoidal timestep embeddings.
+ :param t: a 1-D Tensor of N indices, one per batch element.
+ These may be fractional.
+ :param dim: the dimension of the output.
+ :param max_period: controls the minimum frequency of the embeddings.
+ :return: an (N, D) Tensor of positional embeddings.
+ """
+ t = time_factor * t
+ half = dim // 2
+ freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to(
+ t.device
+ )
+
+ args = t[:, None].float() * freqs[None]
+ embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+ if dim % 2:
+ embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+ if torch.is_floating_point(t):
+ embedding = embedding.to(t)
+ return embedding
+
+
+class MLPEmbedder(nn.Module):
+ def __init__(self, in_dim: int, hidden_dim: int):
+ super().__init__()
+ self.in_layer = nn.Linear(in_dim, hidden_dim, bias=True)
+ self.silu = nn.SiLU()
+ self.out_layer = nn.Linear(hidden_dim, hidden_dim, bias=True)
+
+ def forward(self, x: Tensor) -> Tensor:
+ return self.out_layer(self.silu(self.in_layer(x)))
+
+
+class RMSNorm(torch.nn.Module):
+ def __init__(self, dim: int):
+ super().__init__()
+ self.scale = nn.Parameter(torch.ones(dim))
+
+ def forward(self, x: Tensor):
+ x_dtype = x.dtype
+ x = x.float()
+ rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + 1e-6)
+ return (x * rrms).to(dtype=x_dtype) * self.scale
+
+
+class QKNorm(torch.nn.Module):
+ def __init__(self, dim: int):
+ super().__init__()
+ self.query_norm = RMSNorm(dim)
+ self.key_norm = RMSNorm(dim)
+
+ def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple[Tensor, Tensor]:
+ q = self.query_norm(q)
+ k = self.key_norm(k)
+ return q.to(v), k.to(v)
+
+
+class SelfAttention(nn.Module):
+ def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+
+ self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+ self.norm = QKNorm(head_dim)
+ self.proj = nn.Linear(dim, dim)
+
+ def forward(self, x: Tensor, pe: Tensor) -> Tensor:
+ qkv = self.qkv(x)
+ q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
+ q, k = self.norm(q, k, v)
+ x = attention(q, k, v, pe=pe)
+ x = self.proj(x)
+ return x
+
+
+@dataclass
+class ModulationOut:
+ shift: Tensor
+ scale: Tensor
+ gate: Tensor
+
+
+class Modulation(nn.Module):
+ def __init__(self, dim: int, double: bool):
+ super().__init__()
+ self.is_double = double
+ self.multiplier = 6 if double else 3
+ self.lin = nn.Linear(dim, self.multiplier * dim, bias=True)
+
+ def forward(self, vec: Tensor) -> tuple[ModulationOut, ModulationOut | None]:
+ out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1)
+
+ return (
+ ModulationOut(*out[:3]),
+ ModulationOut(*out[3:]) if self.is_double else None,
+ )
+
+
+class DoubleStreamBlock(nn.Module):
+ def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False):
+ super().__init__()
+
+ mlp_hidden_dim = int(hidden_size * mlp_ratio)
+ self.num_heads = num_heads
+ self.hidden_size = hidden_size
+ self.img_mod = Modulation(hidden_size, double=True)
+ self.img_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+ self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias)
+
+ self.img_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+ self.img_mlp = nn.Sequential(
+ nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
+ nn.GELU(approximate="tanh"),
+ nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
+ )
+
+ self.txt_mod = Modulation(hidden_size, double=True)
+ self.txt_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+ self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias)
+
+ self.txt_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+ self.txt_mlp = nn.Sequential(
+ nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
+ nn.GELU(approximate="tanh"),
+ nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
+ )
+
+ def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, img_mask: Tensor, txt_mask: Tensor) -> tuple[Tensor, Tensor]:
+ img_mod1, img_mod2 = self.img_mod(vec)
+ txt_mod1, txt_mod2 = self.txt_mod(vec)
+
+ # prepare image for attention
+ img_modulated = self.img_norm1(img)
+ img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
+ img_qkv = self.img_attn.qkv(img_modulated)
+ img_q, img_k, img_v = rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
+ img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
+
+ # prepare txt for attention
+ txt_modulated = self.txt_norm1(txt)
+ txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
+ txt_qkv = self.txt_attn.qkv(txt_modulated)
+ txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
+ txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
+
+ # run actual attention
+ q = torch.cat((txt_q, img_q), dim=2)
+ k = torch.cat((txt_k, img_k), dim=2)
+ v = torch.cat((txt_v, img_v), dim=2)
+
+ if img_mask is not None:
+ attn_mask = torch.cat((txt_mask, img_mask), dim=1)
+
+ # print(q.shape, k.shape, v.shape, pe.shape, attn_mask.shape) torch.Size([2, 24, 2048, 128]) torch.Size([2, 24, 2048, 128]) torch.Size([2, 24, 2048, 128]) torch.Size([2, 1, 2048, 64, 2, 2]) torch.Size([2, 2048])
+ with torch.cuda.device(q.device.index):
+ attn = attention(q, k, v, pe=pe, attn_mask=attn_mask)
+ txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :]
+
+ # calculate the img bloks
+ img = img + img_mod1.gate * self.img_attn.proj(img_attn)
+ img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift)
+
+ # calculate the txt bloks
+ txt = txt + txt_mod1.gate * self.txt_attn.proj(txt_attn)
+ txt = txt + txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift)
+ return img, txt
+
+
+class SingleStreamBlock(nn.Module):
+ """
+ A DiT block with parallel linear layers as described in
+ https://arxiv.org/abs/2302.05442 and adapted modulation interface.
+ """
+
+ def __init__(
+ self,
+ hidden_size: int,
+ num_heads: int,
+ mlp_ratio: float = 4.0,
+ qk_scale: float | None = None,
+ ):
+ super().__init__()
+ self.hidden_dim = hidden_size
+ self.num_heads = num_heads
+ head_dim = hidden_size // num_heads
+ self.scale = qk_scale or head_dim**-0.5
+
+ self.mlp_hidden_dim = int(hidden_size * mlp_ratio)
+ # qkv and mlp_in
+ self.linear1 = nn.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim)
+ # proj and mlp_out
+ self.linear2 = nn.Linear(hidden_size + self.mlp_hidden_dim, hidden_size)
+
+ self.norm = QKNorm(head_dim)
+
+ self.hidden_size = hidden_size
+ self.pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+
+ self.mlp_act = nn.GELU(approximate="tanh")
+ self.modulation = Modulation(hidden_size, double=False)
+
+ def forward(self, x: Tensor, vec: Tensor, pe: Tensor, attn_mask: Tensor) -> Tensor:
+ mod, _ = self.modulation(vec)
+ x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift
+ qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
+
+ q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
+ q, k = self.norm(q, k, v)
+
+ # compute attention
+ with torch.cuda.device(q.device.index):
+ attn = attention(q, k, v, pe=pe, attn_mask=attn_mask)
+ # compute activation in mlp stream, cat again and run second linear layer
+ output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
+ return x + mod.gate * output
+
+
+class LastLayer(nn.Module):
+ def __init__(self, hidden_size: int, patch_size: int, out_channels: int):
+ super().__init__()
+ self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+ self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True)
+ self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True))
+
+ def forward(self, x: Tensor, vec: Tensor) -> Tensor:
+ shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1)
+ x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
+ x = self.linear(x)
+ return x
diff --git a/flux/modules/lora.py b/flux/modules/lora.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b7f0baf52486f848c30a6dfc74e44b24a0f0961
--- /dev/null
+++ b/flux/modules/lora.py
@@ -0,0 +1,199 @@
+import torch
+from torch import nn
+
+
+def replace_linear_with_lora(
+ module: nn.Module,
+ max_rank: int,
+ scale: float = 1.0,
+) -> None:
+ for name, child in module.named_children():
+ if isinstance(child, nn.Linear):
+ new_lora = LinearLora(
+ in_features=child.in_features,
+ out_features=child.out_features,
+ bias=child.bias,
+ rank=max_rank,
+ scale=scale,
+ dtype=child.weight.dtype,
+ device=child.weight.device,
+ )
+
+ new_lora.weight = child.weight
+ new_lora.bias = child.bias if child.bias is not None else None
+
+ setattr(module, name, new_lora)
+ else:
+ replace_linear_with_lora(
+ module=child,
+ max_rank=max_rank,
+ scale=scale,
+ )
+
+
+class LinearLora(nn.Linear):
+ def __init__(
+ self,
+ in_features: int,
+ out_features: int,
+ bias: bool,
+ rank: int,
+ dtype: torch.dtype,
+ device: torch.device,
+ lora_bias: bool = True,
+ scale: float = 1.0,
+ *args,
+ **kwargs,
+ ) -> None:
+ super().__init__(
+ in_features=in_features,
+ out_features=out_features,
+ bias=bias is not None,
+ device=device,
+ dtype=dtype,
+ *args,
+ **kwargs,
+ )
+
+ assert isinstance(scale, float), "scale must be a float"
+
+ self.scale = scale
+ self.rank = rank
+ self.lora_bias = lora_bias
+ self.dtype = dtype
+ self.device = device
+
+ if rank > (new_rank := min(self.out_features, self.in_features)):
+ self.rank = new_rank
+
+ self.lora_A = nn.Linear(
+ in_features=in_features,
+ out_features=self.rank,
+ bias=False,
+ dtype=dtype,
+ device=device,
+ )
+ self.lora_B = nn.Linear(
+ in_features=self.rank,
+ out_features=out_features,
+ bias=self.lora_bias,
+ dtype=dtype,
+ device=device,
+ )
+
+ nn.init.zeros_(self.lora_B.weight)
+ if self.lora_B.bias is not None:
+ nn.init.zeros_(self.lora_B.bias)
+
+ def set_scale(self, scale: float) -> None:
+ assert isinstance(scale, float), "scalar value must be a float"
+ self.scale = scale
+
+ def forward(self, input: torch.Tensor) -> torch.Tensor:
+ base_out = super().forward(input)
+
+ _lora_out_B = self.lora_B(self.lora_A(input))
+ lora_update = _lora_out_B * self.scale
+
+ return base_out + lora_update
+
+
+class MixtureOfLoRAExperts(nn.Linear):
+ def __init__(
+ self,
+ in_features: int,
+ out_features: int,
+ num_experts: int,
+ rank: int,
+ bias: bool = True,
+ dtype: torch.dtype = None,
+ device: torch.device = None,
+ scale: float = 1.0,
+ top_k: int = 2, # 选择前k个专家
+ ) -> None:
+ super().__init__(
+ in_features=in_features,
+ out_features=out_features,
+ bias=bias,
+ device=device,
+ dtype=dtype,
+ )
+
+ self.num_experts = num_experts
+ self.rank = min(rank, min(in_features, out_features))
+ self.scale = scale
+ self.top_k = min(top_k, num_experts)
+
+ # 共享LoRA模块
+ self.shared_lora_A = nn.Linear(in_features, self.rank, bias=False, dtype=dtype, device=device)
+ self.shared_lora_B = nn.Linear(self.rank, out_features, bias=False, dtype=dtype, device=device)
+
+ # 专家LoRA模块
+ self.expert_lora_A = nn.ModuleList([
+ nn.Linear(in_features, self.rank, bias=False, dtype=dtype, device=device)
+ for _ in range(num_experts)
+ ])
+ self.expert_lora_B = nn.ModuleList([
+ nn.Linear(self.rank, out_features, bias=False, dtype=dtype, device=device)
+ for _ in range(num_experts)
+ ])
+
+ # 门控网络
+ self.gate = nn.Linear(in_features, num_experts, dtype=dtype, device=device)
+
+ # 初始化
+ self._init_weights()
+
+ def _init_weights(self) -> None:
+ # 初始化共享LoRA
+ nn.init.zeros_(self.shared_lora_B.weight)
+
+ # 初始化专家LoRA
+ for expert_B in self.expert_lora_B:
+ nn.init.zeros_(expert_B.weight)
+
+ # 初始化门控网络
+ nn.init.zeros_(self.gate.bias)
+ nn.init.normal_(self.gate.weight, std=0.01)
+
+ def forward(self, input: torch.Tensor) -> torch.Tensor:
+ batch_size = input.shape[0]
+
+ # 基础输出 (与原始Linear层相同)
+ base_out = super().forward(input)
+
+ # 共享LoRA输出
+ shared_lora = self.shared_lora_B(self.shared_lora_A(input))
+
+ # 计算门控权重
+ gate_logits = self.gate(input)
+ gate_weights = F.softmax(gate_logits, dim=-1)
+
+ # 选择top-k专家
+ top_k_weights, top_k_indices = torch.topk(gate_weights, self.top_k, dim=-1)
+ top_k_weights = top_k_weights / top_k_weights.sum(dim=-1, keepdim=True)
+
+ # 计算专家输出
+ expert_outputs = torch.zeros_like(base_out)
+ for k in range(self.top_k):
+ # 获取当前批次中每个样本选中的专家索引
+ expert_idx = top_k_indices[:, k]
+ expert_weight = top_k_weights[:, k].unsqueeze(-1)
+
+ # 为每个样本单独计算选中专家的输出
+ for i in range(batch_size):
+ idx = expert_idx[i]
+ expert_out = self.expert_lora_B[idx](self.expert_lora_A[idx](input[i:i+1]))
+ expert_outputs[i:i+1] += expert_out * expert_weight[i]
+
+ # 组合所有输出
+ final_output = (
+ base_out +
+ self.scale * (shared_lora + expert_outputs)
+ )
+
+ return final_output
+
+ def set_scale(self, scale: float) -> None:
+ assert isinstance(scale, float), "scale must be a float"
+ self.scale = scale
\ No newline at end of file
diff --git a/flux/sampling.py b/flux/sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..615bb194c5ed80f6064e6146dd47943c7a4c90a3
--- /dev/null
+++ b/flux/sampling.py
@@ -0,0 +1,372 @@
+import math
+import random
+from typing import Callable
+
+import numpy as np
+import torch
+from einops import rearrange, repeat
+from PIL import Image
+from torch import Tensor
+import torch.nn.functional as F
+
+from .model import Flux
+from .modules.autoencoder import AutoEncoder
+from .modules.conditioner import HFEmbedder
+from .modules.image_embedders import CannyImageEncoder, DepthImageEncoder, ReduxImageEncoder
+
+
+def get_noise(
+ num_samples: int,
+ height: int,
+ width: int,
+ device: torch.device,
+ dtype: torch.dtype,
+ seed: int,
+):
+ return torch.randn(
+ num_samples,
+ 16,
+ # allow for packing
+ 2 * math.ceil(height / 16),
+ 2 * math.ceil(width / 16),
+ device=device,
+ dtype=dtype,
+ generator=torch.Generator(device=device).manual_seed(seed),
+ )
+
+def prepare_modified(t5: HFEmbedder, clip: HFEmbedder, img: list[list[torch.Tensor]], prompt: str | list[str], proportion_empty_prompts: float = 0.1, is_train: bool = True, text_emb: list[dict[str, Tensor]] = None) -> dict[str, Tensor]:
+ assert isinstance(img, list) and all([isinstance(img[i], list) for i in range(len(img))])
+ bs = len(img)
+ if isinstance(img[0], torch.Tensor):
+ max_len = max([i.shape[-2] * i.shape[-1] for i in img]) // 4
+ img_mask = torch.zeros(bs, max_len, device=img[0].device, dtype=torch.int32)
+ else:
+ max_len = max([sum([i.shape[-2] * i.shape[-1] for i in sub_image]) for sub_image in img]) // 4
+ img_mask = torch.zeros(bs, max_len, device=img[0][0].device, dtype=torch.int32)
+ # pad img to same length for batch processing
+ padded_img = []
+ padded_img_ids = []
+ for i in range(bs):
+ img_i = img[i]
+ flat_img_list = []
+ flat_img_ids_list = []
+ for j in range(len(img_i)):
+ img_i_j = img_i[j].squeeze(0)
+ c, h, w = img_i_j.shape
+ img_ids = torch.zeros(h // 2, w // 2, 3)
+ img_ids[..., 0] = j + 1
+ img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2)[:, None]
+ img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2)[None, :]
+
+ flat_img_ids = rearrange(img_ids, "h w c -> (h w) c")
+ flat_img = rearrange(img_i_j, "c (h ph) (w pw) -> (h w) (c ph pw)", ph=2, pw=2)
+ flat_img_list.append(flat_img)
+ flat_img_ids_list.append(flat_img_ids)
+
+ flat_img = torch.cat(flat_img_list, dim=0)
+ flat_img_ids = torch.cat(flat_img_ids_list, dim=0)
+ padded_img.append(F.pad(flat_img, (0, 0, 0, max_len - flat_img.shape[0])))
+ padded_img_ids.append(F.pad(flat_img_ids, (0, 0, 0, max_len - flat_img_ids.shape[0])))
+ img_mask[i, :flat_img.shape[0]] = 1
+
+ img = torch.stack(padded_img, dim=0)
+ img_ids = torch.stack(padded_img_ids, dim=0)
+
+ if isinstance(prompt, str):
+ prompt = [prompt]
+
+ bs = len(prompt)
+ drop_mask = []
+ for idx in range(bs):
+ if random.random() < proportion_empty_prompts:
+ prompt[idx] = ""
+ elif isinstance(prompt[idx], (list)):
+ prompt[idx] = random.choice(prompt[idx]) if is_train else prompt[idx][0]
+ if prompt[idx] == "":
+ drop_mask.append(0)
+ else:
+ drop_mask.append(1)
+ drop_mask = torch.tensor(drop_mask, device=img_mask.device, dtype=img_mask.dtype)
+
+ if t5 is None:
+ txt = torch.stack([item["txt"] for item in text_emb], dim=0).to(img.device)
+ else:
+ txt = t5(prompt)
+ if txt.shape[0] == 1 and bs > 1:
+ txt = repeat(txt, "1 ... -> bs ...", bs=bs)
+ txt_ids = torch.zeros(bs, txt.shape[1], 3)
+ txt_mask = torch.ones(bs, txt.shape[1], device=txt.device, dtype=torch.int32)
+
+ if clip is None:
+ vec = torch.stack([item["vec"] for item in text_emb], dim=0).to(img.device)
+ else:
+ vec = clip(prompt)
+ if vec.shape[0] == 1 and bs > 1:
+ vec = repeat(vec, "1 ... -> bs ...", bs=bs)
+
+ out_dict = {
+ "img": img,
+ "img_ids": img_ids.to(img.device),
+ "txt": txt.to(img.device),
+ "txt_ids": txt_ids.to(img.device),
+ "vec": vec.to(img.device),
+ "img_mask": img_mask.to(img.device),
+ "txt_mask": txt_mask.to(txt.device),
+ "drop_mask": drop_mask.to(img.device),
+ }
+
+ return out_dict
+
+
+# ############################# Original Prepare Function #############################
+
+def prepare(t5: HFEmbedder,
+ clip: HFEmbedder,
+ img: Tensor,
+ prompt: str | list[str]
+ ) -> dict[str, Tensor]:
+ bs, c, h, w = img.shape
+ if bs == 1 and not isinstance(prompt, str):
+ bs = len(prompt)
+
+ img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+ if img.shape[0] == 1 and bs > 1:
+ img = repeat(img, "1 ... -> bs ...", bs=bs)
+
+ img_ids = torch.zeros(h // 2, w // 2, 3)
+ img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2)[:, None]
+ img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2)[None, :]
+ img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
+
+ if isinstance(prompt, str):
+ prompt = [prompt]
+ txt = t5(prompt)
+ if txt.shape[0] == 1 and bs > 1:
+ txt = repeat(txt, "1 ... -> bs ...", bs=bs)
+ txt_ids = torch.zeros(bs, txt.shape[1], 3)
+
+ vec = clip(prompt)
+ if vec.shape[0] == 1 and bs > 1:
+ vec = repeat(vec, "1 ... -> bs ...", bs=bs)
+
+ return {
+ "img": img,
+ "img_ids": img_ids.to(img.device),
+ "txt": txt.to(img.device),
+ "txt_ids": txt_ids.to(img.device),
+ "vec": vec.to(img.device),
+ }
+
+def prepare_control(
+ t5: HFEmbedder,
+ clip: HFEmbedder,
+ img: Tensor,
+ prompt: str | list[str],
+ ae: AutoEncoder,
+ encoder: DepthImageEncoder | CannyImageEncoder,
+ img_cond_path: str,
+) -> dict[str, Tensor]:
+ # load and encode the conditioning image
+ bs, _, h, w = img.shape
+ if bs == 1 and not isinstance(prompt, str):
+ bs = len(prompt)
+
+ img_cond = Image.open(img_cond_path).convert("RGB")
+
+ width = w * 8
+ height = h * 8
+ img_cond = img_cond.resize((width, height), Image.LANCZOS)
+ img_cond = np.array(img_cond)
+ img_cond = torch.from_numpy(img_cond).float() / 127.5 - 1.0
+ img_cond = rearrange(img_cond, "h w c -> 1 c h w")
+
+ with torch.no_grad():
+ img_cond = encoder(img_cond)
+ img_cond = ae.encode(img_cond)
+
+ img_cond = img_cond.to(torch.bfloat16)
+ img_cond = rearrange(img_cond, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+ if img_cond.shape[0] == 1 and bs > 1:
+ img_cond = repeat(img_cond, "1 ... -> bs ...", bs=bs)
+
+ return_dict = prepare(t5, clip, img, prompt)
+ return_dict["img_cond"] = img_cond
+ return return_dict
+
+
+def prepare_fill(
+ t5: HFEmbedder,
+ clip: HFEmbedder,
+ img: Tensor,
+ prompt: str | list[str],
+ ae: AutoEncoder,
+ img_cond_path: str,
+ mask_path: str,
+) -> dict[str, Tensor]:
+ # load and encode the conditioning image and the mask
+ bs, _, _, _ = img.shape
+ if bs == 1 and not isinstance(prompt, str):
+ bs = len(prompt)
+
+ img_cond = Image.open(img_cond_path).convert("RGB")
+ img_cond = np.array(img_cond)
+ img_cond = torch.from_numpy(img_cond).float() / 127.5 - 1.0
+ img_cond = rearrange(img_cond, "h w c -> 1 c h w")
+
+ mask = Image.open(mask_path).convert("L")
+ mask = np.array(mask)
+ mask = torch.from_numpy(mask).float() / 255.0
+ mask = rearrange(mask, "h w -> 1 1 h w")
+
+ with torch.no_grad():
+ img_cond = img_cond.to(img.device)
+ mask = mask.to(img.device)
+ img_cond = img_cond * (1 - mask)
+ img_cond = ae.encode(img_cond)
+ mask = mask[:, 0, :, :]
+ mask = mask.to(torch.bfloat16)
+ mask = rearrange(
+ mask,
+ "b (h ph) (w pw) -> b (ph pw) h w",
+ ph=8,
+ pw=8,
+ )
+ mask = rearrange(mask, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+ if mask.shape[0] == 1 and bs > 1:
+ mask = repeat(mask, "1 ... -> bs ...", bs=bs)
+
+ img_cond = img_cond.to(torch.bfloat16)
+ img_cond = rearrange(img_cond, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+ if img_cond.shape[0] == 1 and bs > 1:
+ img_cond = repeat(img_cond, "1 ... -> bs ...", bs=bs)
+
+ img_cond = torch.cat((img_cond, mask), dim=-1)
+
+ return_dict = prepare(t5, clip, img, prompt)
+ return_dict["img_cond"] = img_cond.to(img.device)
+ return return_dict
+
+
+def prepare_redux(
+ t5: HFEmbedder,
+ clip: HFEmbedder,
+ img: Tensor,
+ prompt: str | list[str],
+ encoder: ReduxImageEncoder,
+ img_cond_path: str,
+) -> dict[str, Tensor]:
+ bs, _, h, w = img.shape
+ if bs == 1 and not isinstance(prompt, str):
+ bs = len(prompt)
+
+ img_cond = Image.open(img_cond_path).convert("RGB")
+ with torch.no_grad():
+ img_cond = encoder(img_cond)
+
+ img_cond = img_cond.to(torch.bfloat16)
+ if img_cond.shape[0] == 1 and bs > 1:
+ img_cond = repeat(img_cond, "1 ... -> bs ...", bs=bs)
+
+ img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+ if img.shape[0] == 1 and bs > 1:
+ img = repeat(img, "1 ... -> bs ...", bs=bs)
+
+ img_ids = torch.zeros(h // 2, w // 2, 3)
+ img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2)[:, None]
+ img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2)[None, :]
+ img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
+
+ if isinstance(prompt, str):
+ prompt = [prompt]
+ txt = t5(prompt)
+ txt = torch.cat((txt, img_cond.to(txt)), dim=-2)
+ if txt.shape[0] == 1 and bs > 1:
+ txt = repeat(txt, "1 ... -> bs ...", bs=bs)
+ txt_ids = torch.zeros(bs, txt.shape[1], 3)
+
+ vec = clip(prompt)
+ if vec.shape[0] == 1 and bs > 1:
+ vec = repeat(vec, "1 ... -> bs ...", bs=bs)
+
+ return {
+ "img": img,
+ "img_ids": img_ids.to(img.device),
+ "txt": txt.to(img.device),
+ "txt_ids": txt_ids.to(img.device),
+ "vec": vec.to(img.device),
+ }
+
+
+def time_shift(mu: float, sigma: float, t: Tensor):
+ return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
+
+
+def get_lin_function(
+ x1: float = 256, y1: float = 0.5, x2: float = 4096, y2: float = 1.15
+) -> Callable[[float], float]:
+ m = (y2 - y1) / (x2 - x1)
+ b = y1 - m * x1
+ return lambda x: m * x + b
+
+
+def get_schedule(
+ num_steps: int,
+ image_seq_len: int,
+ base_shift: float = 0.5,
+ max_shift: float = 1.15,
+ shift: bool = True,
+) -> list[float]:
+ # extra step for zero
+ timesteps = torch.linspace(1, 0, num_steps + 1)
+
+ # shifting the schedule to favor high timesteps for higher signal images
+ if shift:
+ # estimate mu based on linear estimation between two points
+ mu = get_lin_function(y1=base_shift, y2=max_shift)(image_seq_len)
+ timesteps = time_shift(mu, 1.0, timesteps)
+
+ return timesteps.tolist()
+
+
+def denoise(
+ model: Flux,
+ # model input
+ img: Tensor,
+ img_ids: Tensor,
+ txt: Tensor,
+ txt_ids: Tensor,
+ vec: Tensor,
+ # sampling parameters
+ timesteps: list[float],
+ guidance: float = 4.0,
+ # extra img tokens
+ img_cond: Tensor | None = None,
+):
+ # this is ignored for schnell
+ guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
+ for t_curr, t_prev in zip(timesteps[:-1], timesteps[1:]):
+ t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
+ pred = model(
+ img=torch.cat((img, img_cond), dim=-1) if img_cond is not None else img,
+ img_ids=img_ids,
+ txt=txt,
+ txt_ids=txt_ids,
+ y=vec,
+ timesteps=t_vec,
+ guidance=guidance_vec,
+ )
+
+ img = img + (t_prev - t_curr) * pred
+
+ return img
+
+
+def unpack(x: Tensor, height: int, width: int) -> Tensor:
+ return rearrange(
+ x,
+ "b (h w) (c ph pw) -> b c (h ph) (w pw)",
+ h=math.ceil(height / 16),
+ w=math.ceil(width / 16),
+ ph=2,
+ pw=2,
+ )
diff --git a/flux/util.py b/flux/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..de10e65c037b7ce8115a9ed1842f3603037b6ae4
--- /dev/null
+++ b/flux/util.py
@@ -0,0 +1,516 @@
+import os
+from dataclasses import dataclass
+
+import torch
+from einops import rearrange
+from huggingface_hub import hf_hub_download
+from imwatermark import WatermarkEncoder
+from PIL import ExifTags, Image
+from safetensors.torch import load_file as load_sft
+
+from flux.model import Flux, FluxLoraWrapper, FluxParams
+from flux.modules.autoencoder import AutoEncoder, AutoEncoderParams
+from flux.modules.conditioner import HFEmbedder
+
+
+def save_image(
+ nsfw_classifier,
+ name: str,
+ output_name: str,
+ idx: int,
+ x: torch.Tensor,
+ add_sampling_metadata: bool,
+ prompt: str,
+ nsfw_threshold: float = 0.85,
+) -> int:
+ fn = output_name.format(idx=idx)
+ print(f"Saving {fn}")
+ # bring into PIL format and save
+ x = x.clamp(-1, 1)
+ x = embed_watermark(x.float())
+ x = rearrange(x[0], "c h w -> h w c")
+
+ img = Image.fromarray((127.5 * (x + 1.0)).cpu().byte().numpy())
+ nsfw_score = [x["score"] for x in nsfw_classifier(img) if x["label"] == "nsfw"][0]
+
+ if nsfw_score < nsfw_threshold:
+ exif_data = Image.Exif()
+ exif_data[ExifTags.Base.Software] = "AI generated;txt2img;flux"
+ exif_data[ExifTags.Base.Make] = "Black Forest Labs"
+ exif_data[ExifTags.Base.Model] = name
+ if add_sampling_metadata:
+ exif_data[ExifTags.Base.ImageDescription] = prompt
+ img.save(fn, exif=exif_data, quality=95, subsampling=0)
+ idx += 1
+ else:
+ print("Your generated image may contain NSFW content.")
+
+ return idx
+
+
+@dataclass
+class ModelSpec:
+ params: FluxParams
+ ae_params: AutoEncoderParams
+ ckpt_path: str | None
+ lora_path: str | None
+ ae_path: str | None
+ repo_id: str | None
+ repo_flow: str | None
+ repo_ae: str | None
+
+
+configs = {
+ "flux-dev": ModelSpec(
+ repo_id="black-forest-labs/FLUX.1-dev",
+ repo_flow="flux1-dev.safetensors",
+ repo_ae="ae.safetensors",
+ ckpt_path=os.getenv("FLUX_DEV"),
+ lora_path=None,
+ params=FluxParams(
+ in_channels=64,
+ out_channels=64,
+ vec_in_dim=768,
+ context_in_dim=4096,
+ hidden_size=3072,
+ mlp_ratio=4.0,
+ num_heads=24,
+ depth=19,
+ depth_single_blocks=38,
+ axes_dim=[16, 56, 56],
+ theta=10_000,
+ qkv_bias=True,
+ guidance_embed=True,
+ ),
+ ae_path=os.getenv("AE"),
+ ae_params=AutoEncoderParams(
+ resolution=256,
+ in_channels=3,
+ ch=128,
+ out_ch=3,
+ ch_mult=[1, 2, 4, 4],
+ num_res_blocks=2,
+ z_channels=16,
+ scale_factor=0.3611,
+ shift_factor=0.1159,
+ ),
+ ),
+ "flux-dev-lora": ModelSpec(
+ repo_id="black-forest-labs/FLUX.1-dev",
+ repo_flow="flux1-dev.safetensors",
+ repo_ae="ae.safetensors",
+ ckpt_path=os.getenv("FLUX_DEV"),
+ lora_path="your_lora_path",
+ params=FluxParams(
+ in_channels=64,
+ out_channels=64,
+ vec_in_dim=768,
+ context_in_dim=4096,
+ hidden_size=3072,
+ mlp_ratio=4.0,
+ num_heads=24,
+ depth=19,
+ depth_single_blocks=38,
+ axes_dim=[16, 56, 56],
+ theta=10_000,
+ qkv_bias=True,
+ guidance_embed=True,
+ ),
+ ae_path=os.getenv("AE"),
+ ae_params=AutoEncoderParams(
+ resolution=256,
+ in_channels=3,
+ ch=128,
+ out_ch=3,
+ ch_mult=[1, 2, 4, 4],
+ num_res_blocks=2,
+ z_channels=16,
+ scale_factor=0.3611,
+ shift_factor=0.1159,
+ ),
+ ),
+ "flux-dev-fill-lora": ModelSpec(
+ repo_id="black-forest-labs/FLUX.1-Fill-dev",
+ repo_flow="flux1-fill-dev.safetensors",
+ repo_ae="ae.safetensors",
+ ckpt_path=os.getenv("FLUX_DEV_FILL"),
+ lora_path="your_lora_path",
+ params=FluxParams(
+ in_channels=384,
+ out_channels=64,
+ vec_in_dim=768,
+ context_in_dim=4096,
+ hidden_size=3072,
+ mlp_ratio=4.0,
+ num_heads=24,
+ depth=19,
+ depth_single_blocks=38,
+ axes_dim=[16, 56, 56],
+ theta=10_000,
+ qkv_bias=True,
+ guidance_embed=True,
+ ),
+ ae_path=os.getenv("AE"),
+ ae_params=AutoEncoderParams(
+ resolution=256,
+ in_channels=3,
+ ch=128,
+ out_ch=3,
+ ch_mult=[1, 2, 4, 4],
+ num_res_blocks=2,
+ z_channels=16,
+ scale_factor=0.3611,
+ shift_factor=0.1159,
+ ),
+ ),
+ "flux-schnell": ModelSpec(
+ repo_id="black-forest-labs/FLUX.1-schnell",
+ repo_flow="flux1-schnell.safetensors",
+ repo_ae="ae.safetensors",
+ ckpt_path=os.getenv("FLUX_SCHNELL"),
+ lora_path=None,
+ params=FluxParams(
+ in_channels=64,
+ out_channels=64,
+ vec_in_dim=768,
+ context_in_dim=4096,
+ hidden_size=3072,
+ mlp_ratio=4.0,
+ num_heads=24,
+ depth=19,
+ depth_single_blocks=38,
+ axes_dim=[16, 56, 56],
+ theta=10_000,
+ qkv_bias=True,
+ guidance_embed=False,
+ ),
+ ae_path=os.getenv("AE"),
+ ae_params=AutoEncoderParams(
+ resolution=256,
+ in_channels=3,
+ ch=128,
+ out_ch=3,
+ ch_mult=[1, 2, 4, 4],
+ num_res_blocks=2,
+ z_channels=16,
+ scale_factor=0.3611,
+ shift_factor=0.1159,
+ ),
+ ),
+ "flux-dev-canny": ModelSpec(
+ repo_id="black-forest-labs/FLUX.1-Canny-dev",
+ repo_flow="flux1-canny-dev.safetensors",
+ repo_ae="ae.safetensors",
+ ckpt_path=os.getenv("FLUX_DEV_CANNY"),
+ lora_path=None,
+ params=FluxParams(
+ in_channels=128,
+ out_channels=64,
+ vec_in_dim=768,
+ context_in_dim=4096,
+ hidden_size=3072,
+ mlp_ratio=4.0,
+ num_heads=24,
+ depth=19,
+ depth_single_blocks=38,
+ axes_dim=[16, 56, 56],
+ theta=10_000,
+ qkv_bias=True,
+ guidance_embed=True,
+ ),
+ ae_path=os.getenv("AE"),
+ ae_params=AutoEncoderParams(
+ resolution=256,
+ in_channels=3,
+ ch=128,
+ out_ch=3,
+ ch_mult=[1, 2, 4, 4],
+ num_res_blocks=2,
+ z_channels=16,
+ scale_factor=0.3611,
+ shift_factor=0.1159,
+ ),
+ ),
+ "flux-dev-canny-lora": ModelSpec(
+ repo_id="black-forest-labs/FLUX.1-dev",
+ repo_flow="flux1-dev.safetensors",
+ repo_ae="ae.safetensors",
+ ckpt_path=os.getenv("FLUX_DEV"),
+ lora_path=os.getenv("FLUX_DEV_CANNY_LORA"),
+ params=FluxParams(
+ in_channels=128,
+ out_channels=64,
+ vec_in_dim=768,
+ context_in_dim=4096,
+ hidden_size=3072,
+ mlp_ratio=4.0,
+ num_heads=24,
+ depth=19,
+ depth_single_blocks=38,
+ axes_dim=[16, 56, 56],
+ theta=10_000,
+ qkv_bias=True,
+ guidance_embed=True,
+ ),
+ ae_path=os.getenv("AE"),
+ ae_params=AutoEncoderParams(
+ resolution=256,
+ in_channels=3,
+ ch=128,
+ out_ch=3,
+ ch_mult=[1, 2, 4, 4],
+ num_res_blocks=2,
+ z_channels=16,
+ scale_factor=0.3611,
+ shift_factor=0.1159,
+ ),
+ ),
+ "flux-dev-depth": ModelSpec(
+ repo_id="black-forest-labs/FLUX.1-Depth-dev",
+ repo_flow="flux1-depth-dev.safetensors",
+ repo_ae="ae.safetensors",
+ ckpt_path=os.getenv("FLUX_DEV_DEPTH"),
+ lora_path=None,
+ params=FluxParams(
+ in_channels=128,
+ out_channels=64,
+ vec_in_dim=768,
+ context_in_dim=4096,
+ hidden_size=3072,
+ mlp_ratio=4.0,
+ num_heads=24,
+ depth=19,
+ depth_single_blocks=38,
+ axes_dim=[16, 56, 56],
+ theta=10_000,
+ qkv_bias=True,
+ guidance_embed=True,
+ ),
+ ae_path=os.getenv("AE"),
+ ae_params=AutoEncoderParams(
+ resolution=256,
+ in_channels=3,
+ ch=128,
+ out_ch=3,
+ ch_mult=[1, 2, 4, 4],
+ num_res_blocks=2,
+ z_channels=16,
+ scale_factor=0.3611,
+ shift_factor=0.1159,
+ ),
+ ),
+ "flux-dev-depth-lora": ModelSpec(
+ repo_id="black-forest-labs/FLUX.1-dev",
+ repo_flow="flux1-dev.safetensors",
+ repo_ae="ae.safetensors",
+ ckpt_path=os.getenv("FLUX_DEV"),
+ lora_path=os.getenv("FLUX_DEV_DEPTH_LORA"),
+ params=FluxParams(
+ in_channels=128,
+ out_channels=64,
+ vec_in_dim=768,
+ context_in_dim=4096,
+ hidden_size=3072,
+ mlp_ratio=4.0,
+ num_heads=24,
+ depth=19,
+ depth_single_blocks=38,
+ axes_dim=[16, 56, 56],
+ theta=10_000,
+ qkv_bias=True,
+ guidance_embed=True,
+ ),
+ ae_path=os.getenv("AE"),
+ ae_params=AutoEncoderParams(
+ resolution=256,
+ in_channels=3,
+ ch=128,
+ out_ch=3,
+ ch_mult=[1, 2, 4, 4],
+ num_res_blocks=2,
+ z_channels=16,
+ scale_factor=0.3611,
+ shift_factor=0.1159,
+ ),
+ ),
+ "flux-dev-fill": ModelSpec(
+ repo_id="black-forest-labs/FLUX.1-Fill-dev",
+ repo_flow="flux1-fill-dev.safetensors",
+ repo_ae="ae.safetensors",
+ ckpt_path=os.getenv("FLUX_DEV_FILL"),
+ lora_path=None,
+ params=FluxParams(
+ in_channels=384,
+ out_channels=64,
+ vec_in_dim=768,
+ context_in_dim=4096,
+ hidden_size=3072,
+ mlp_ratio=4.0,
+ num_heads=24,
+ depth=19,
+ depth_single_blocks=38,
+ axes_dim=[16, 56, 56],
+ theta=10_000,
+ qkv_bias=True,
+ guidance_embed=True,
+ ),
+ ae_path=os.getenv("AE"),
+ ae_params=AutoEncoderParams(
+ resolution=256,
+ in_channels=3,
+ ch=128,
+ out_ch=3,
+ ch_mult=[1, 2, 4, 4],
+ num_res_blocks=2,
+ z_channels=16,
+ scale_factor=0.3611,
+ shift_factor=0.1159,
+ ),
+ ),
+}
+
+
+def print_load_warning(missing: list[str], unexpected: list[str]) -> None:
+ if len(missing) > 0 and len(unexpected) > 0:
+ print(f"Got {len(missing)} missing keys:\n\t" + "\n\t".join(missing))
+ print("\n" + "-" * 79 + "\n")
+ print(f"Got {len(unexpected)} unexpected keys:\n\t" + "\n\t".join(unexpected))
+ elif len(missing) > 0:
+ print(f"Got {len(missing)} missing keys:\n\t" + "\n\t".join(missing))
+ elif len(unexpected) > 0:
+ print(f"Got {len(unexpected)} unexpected keys:\n\t" + "\n\t".join(unexpected))
+
+
+def load_flow_model(
+ name: str, device: str | torch.device = "cuda", hf_download: bool = True, verbose: bool = True, lora_rank: int = 128, lora_scale: float = 1.0
+) -> Flux:
+ # Loading Flux
+ print("Init model")
+ ckpt_path = configs[name].ckpt_path
+ lora_path = configs[name].lora_path
+ if (
+ ckpt_path is None
+ and configs[name].repo_id is not None
+ and configs[name].repo_flow is not None
+ and hf_download
+ ):
+ ckpt_path = hf_hub_download(configs[name].repo_id, configs[name].repo_flow)
+
+ print(f"ckpt_path: {ckpt_path}, lora_path: {lora_path}")
+ # with torch.device("meta" if ckpt_path is not None else device):
+ if lora_path is not None:
+ model = FluxLoraWrapper(params=configs[name].params, lora_rank=lora_rank, lora_scale=lora_scale).to(torch.bfloat16)
+ else:
+ model = Flux(configs[name].params).to(torch.bfloat16)
+
+ if ckpt_path is not None:
+ print("Loading checkpoint")
+ # load_sft doesn't support torch.device
+ sd = load_sft(ckpt_path, device=str(device))
+ sd = optionally_expand_state_dict(model, sd)
+ missing, unexpected = model.load_state_dict(sd, strict=False, assign=True)
+ # if verbose:
+ # print_load_warning(missing, unexpected)
+
+ if configs[name].lora_path is not None and os.path.exists(configs[name].lora_path):
+ print("Loading LoRA")
+ lora_sd = load_sft(configs[name].lora_path, device=str(device))
+ # loading the lora params + overwriting scale values in the norms
+ missing, unexpected = model.load_state_dict(lora_sd, strict=False, assign=True)
+ # if verbose:
+ # print_load_warning(missing, unexpected)
+ return model
+
+
+def load_t5(device: str | torch.device = "cuda", max_length: int = 512) -> HFEmbedder:
+ # max length 64, 128, 256 and 512 should work (if your sequence is short enough)
+ return HFEmbedder("google/t5-v1_1-xxl", max_length=max_length, torch_dtype=torch.bfloat16).to(device)
+
+
+def load_clip(device: str | torch.device = "cuda") -> HFEmbedder:
+ return HFEmbedder("openai/clip-vit-large-patch14", max_length=77, torch_dtype=torch.bfloat16).to(device)
+
+
+def load_ae(name: str, device: str | torch.device = "cuda", hf_download: bool = True) -> AutoEncoder:
+ ckpt_path = configs[name].ae_path
+ if (
+ ckpt_path is None
+ and configs[name].repo_id is not None
+ and configs[name].repo_ae is not None
+ and hf_download
+ ):
+ ckpt_path = hf_hub_download(configs[name].repo_id, configs[name].repo_ae)
+
+ # Loading the autoencoder
+ print("Init AE")
+ with torch.device("meta" if ckpt_path is not None else device):
+ ae = AutoEncoder(configs[name].ae_params)
+
+ if ckpt_path is not None:
+ sd = load_sft(ckpt_path, device=str(device))
+ missing, unexpected = ae.load_state_dict(sd, strict=False, assign=True)
+ print_load_warning(missing, unexpected)
+ return ae
+
+
+def optionally_expand_state_dict(model: torch.nn.Module, state_dict: dict) -> dict:
+ """
+ Optionally expand the state dict to match the model's parameters shapes.
+ """
+ for name, param in model.named_parameters():
+ if name in state_dict:
+ if state_dict[name].shape != param.shape:
+ print(
+ f"Expanding '{name}' with shape {state_dict[name].shape} to model parameter with shape {param.shape}."
+ )
+ # expand with zeros:
+ expanded_state_dict_weight = torch.zeros_like(param, device=state_dict[name].device)
+ slices = tuple(slice(0, dim) for dim in state_dict[name].shape)
+ expanded_state_dict_weight[slices] = state_dict[name]
+ state_dict[name] = expanded_state_dict_weight
+
+ return state_dict
+
+
+class WatermarkEmbedder:
+ def __init__(self, watermark):
+ self.watermark = watermark
+ self.num_bits = len(WATERMARK_BITS)
+ self.encoder = WatermarkEncoder()
+ self.encoder.set_watermark("bits", self.watermark)
+
+ def __call__(self, image: torch.Tensor) -> torch.Tensor:
+ """
+ Adds a predefined watermark to the input image
+
+ Args:
+ image: ([N,] B, RGB, H, W) in range [-1, 1]
+
+ Returns:
+ same as input but watermarked
+ """
+ image = 0.5 * image + 0.5
+ squeeze = len(image.shape) == 4
+ if squeeze:
+ image = image[None, ...]
+ n = image.shape[0]
+ image_np = rearrange((255 * image).detach().cpu(), "n b c h w -> (n b) h w c").numpy()[:, :, :, ::-1]
+ # torch (b, c, h, w) in [0, 1] -> numpy (b, h, w, c) [0, 255]
+ # watermarking libary expects input as cv2 BGR format
+ for k in range(image_np.shape[0]):
+ image_np[k] = self.encoder.encode(image_np[k], "dwtDct")
+ image = torch.from_numpy(rearrange(image_np[:, :, :, ::-1], "(n b) h w c -> n b c h w", n=n)).to(
+ image.device
+ )
+ image = torch.clamp(image / 255, min=0.0, max=1.0)
+ if squeeze:
+ image = image[0]
+ image = 2 * image - 1
+ return image
+
+
+# A fixed 48-bit message that was chosen at random
+WATERMARK_MESSAGE = 0b001010101111111010000111100111001111010100101110
+# bin(x)[2:] gives bits of x as str, use int to convert them to 0/1
+WATERMARK_BITS = [int(bit) for bit in bin(WATERMARK_MESSAGE)[2:]]
+embed_watermark = WatermarkEmbedder(WATERMARK_BITS)
diff --git a/imgproc.py b/imgproc.py
new file mode 100755
index 0000000000000000000000000000000000000000..5d9452c72ab2691a55e6ae3d1de73b9f4d940774
--- /dev/null
+++ b/imgproc.py
@@ -0,0 +1,96 @@
+import random
+
+from PIL import Image
+import PIL.Image
+import numpy as np
+
+
+def center_crop_arr(pil_image, image_size):
+ """
+ Center cropping implementation from ADM.
+ https://github.com/openai/guided-diffusion/blob/8fb3ad9197f16bbc40620447b2742e13458d2831/guided_diffusion/image_datasets.py#L126
+ """
+ while min(*pil_image.size) >= 2 * image_size:
+ pil_image = pil_image.resize(tuple(x // 2 for x in pil_image.size), resample=Image.BOX)
+
+ scale = image_size / min(*pil_image.size)
+ pil_image = pil_image.resize(tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC)
+
+ arr = np.array(pil_image)
+ crop_y = (arr.shape[0] - image_size) // 2
+ crop_x = (arr.shape[1] - image_size) // 2
+ return Image.fromarray(arr[crop_y : crop_y + image_size, crop_x : crop_x + image_size])
+
+
+def center_crop(pil_image, crop_size):
+ while pil_image.size[0] >= 2 * crop_size[0] and pil_image.size[1] >= 2 * crop_size[1]:
+ pil_image = pil_image.resize(tuple(x // 2 for x in pil_image.size), resample=Image.BOX)
+
+ scale = max(crop_size[0] / pil_image.size[0], crop_size[1] / pil_image.size[1])
+ pil_image = pil_image.resize(tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC)
+
+ crop_left = random.randint(0, pil_image.size[0] - crop_size[0])
+ crop_upper = random.randint(0, pil_image.size[1] - crop_size[1])
+ crop_right = crop_left + crop_size[0]
+ crop_lower = crop_upper + crop_size[1]
+ return pil_image.crop(box=(crop_left, crop_upper, crop_right, crop_lower))
+
+
+def pad(pil_image, pad_size):
+ while pil_image.size[0] >= 2 * pad_size[0] and pil_image.size[1] >= 2 * pad_size[1]:
+ pil_image = pil_image.resize(tuple(x // 2 for x in pil_image.size), resample=Image.BOX)
+
+ scale = min(pad_size[0] / pil_image.size[0], pad_size[1] / pil_image.size[1])
+ pil_image = pil_image.resize(tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC)
+
+ new_image = Image.new('RGB', pad_size, (255, 255, 255))
+ new_image.paste(pil_image, (0, 0))
+ return new_image
+
+
+def var_center_crop(pil_image, crop_size_list, random_top_k=4):
+ w, h = pil_image.size
+ rem_percent = [min(cw / w, ch / h) / max(cw / w, ch / h) for cw, ch in crop_size_list]
+ crop_size = random.choice(
+ sorted(((x, y) for x, y in zip(rem_percent, crop_size_list)), reverse=True)[:random_top_k]
+ )[1]
+ return center_crop(pil_image, crop_size)
+
+
+def var_pad(pil_image, pad_size_list, random_top_k=4):
+ w, h = pil_image.size
+ rem_percent = [min(cw / w, ch / h) / max(cw / w, ch / h) for cw, ch in pad_size_list]
+ crop_size = random.choice(
+ sorted(((x, y) for x, y in zip(rem_percent, pad_size_list)), reverse=True)[:random_top_k]
+ )[1]
+ return pad(pil_image, crop_size)
+
+
+def match_size(w, h, crop_size_list, random_top_k=4):
+ rem_percent = [min(cw / w, ch / h) / max(cw / w, ch / h) for cw, ch in crop_size_list]
+ crop_size = random.choice(
+ sorted(((x, y) for x, y in zip(rem_percent, crop_size_list)), reverse=True)[:random_top_k]
+ )[1]
+ return crop_size
+
+
+def generate_crop_size_list(num_patches, patch_size, max_ratio=4.0, step_size=1):
+ assert max_ratio >= 1.0
+ crop_size_list = []
+ wp, hp = num_patches, step_size
+ while wp > 0:
+ if max(wp, hp) / min(wp, hp) <= max_ratio:
+ crop_size_list.append((wp * patch_size, hp * patch_size))
+ if (hp + step_size) * wp <= num_patches:
+ hp += step_size
+ else:
+ wp -= step_size
+ return crop_size_list
+
+def to_rgb_if_rgba(img: Image.Image):
+ if img.mode.upper() == "RGBA":
+ rgb_img = Image.new("RGB", img.size, (255, 255, 255))
+ rgb_img.paste(img, mask=img.split()[3]) # 3 is the alpha channel
+ return rgb_img
+ else:
+ return img
diff --git a/transport/__init__.py b/transport/__init__.py
new file mode 100755
index 0000000000000000000000000000000000000000..de587ac28438caf863f0ea125ae724bc716ad5de
--- /dev/null
+++ b/transport/__init__.py
@@ -0,0 +1,62 @@
+from .transport import ModelType, PathType, Sampler, Transport, WeightType
+
+
+def create_transport(
+ path_type="Linear",
+ prediction="velocity",
+ loss_weight=None,
+ train_eps=None,
+ sample_eps=None,
+ snr_type="uniform",
+ loss_type="mse",
+ do_shift=True,
+):
+ """function for creating Transport object
+ **Note**: model prediction defaults to velocity
+ Args:
+ - path_type: type of path to use; default to linear
+ - learn_score: set model prediction to score
+ - learn_noise: set model prediction to noise
+ - velocity_weighted: weight loss by velocity weight
+ - likelihood_weighted: weight loss by likelihood weight
+ - train_eps: small epsilon for avoiding instability during training
+ - sample_eps: small epsilon for avoiding instability during sampling
+ """
+
+ if prediction == "noise":
+ model_type = ModelType.NOISE
+ elif prediction == "score":
+ model_type = ModelType.SCORE
+ else:
+ model_type = ModelType.VELOCITY
+
+ path_choice = {
+ "Linear": PathType.LINEAR,
+ "GVP": PathType.GVP,
+ "VP": PathType.VP,
+ }
+
+ path_type = path_choice[path_type]
+
+ if path_type in [PathType.VP]:
+ train_eps = 1e-5 if train_eps is None else train_eps
+ sample_eps = 1e-3 if sample_eps is None else sample_eps
+ elif path_type in [PathType.GVP, PathType.LINEAR] and model_type != ModelType.VELOCITY:
+ train_eps = 1e-3 if train_eps is None else train_eps
+ sample_eps = 1e-3 if sample_eps is None else sample_eps
+ else: # velocity & [GVP, LINEAR] is stable everywhere
+ train_eps = 0
+ sample_eps = 0
+
+ # create flow state
+ state = Transport(
+ model_type=model_type,
+ path_type=path_type,
+ train_eps=train_eps,
+ sample_eps=sample_eps,
+ snr_type=snr_type,
+ loss_type=loss_type,
+ do_shift=do_shift,
+ )
+
+ return state
diff --git a/transport/integrators.py b/transport/integrators.py
new file mode 100755
index 0000000000000000000000000000000000000000..9b81f6bf0fc3a604ac9fcb6afaf8ff2c49863ed8
--- /dev/null
+++ b/transport/integrators.py
@@ -0,0 +1,120 @@
+import torch as th
+from torchdiffeq import odeint
+from .utils import time_shift, get_lin_function
+
+class sde:
+ """SDE solver class"""
+
+ def __init__(
+ self,
+ drift,
+ diffusion,
+ *,
+ t0,
+ t1,
+ num_steps,
+ sampler_type,
+ ):
+ assert t0 < t1, "SDE sampler has to be in forward time"
+
+ self.num_timesteps = num_steps
+ self.t = th.linspace(t0, t1, num_steps)
+ self.dt = self.t[1] - self.t[0]
+ self.drift = drift
+ self.diffusion = diffusion
+ self.sampler_type = sampler_type
+
+ def __Euler_Maruyama_step(self, x, mean_x, t, model, **model_kwargs):
+ w_cur = th.randn(x.size()).to(x)
+ t = th.ones(x.size(0)).to(x) * t
+ dw = w_cur * th.sqrt(self.dt)
+ drift = self.drift(x, t, model, **model_kwargs)
+ diffusion = self.diffusion(x, t)
+ mean_x = x + drift * self.dt
+ x = mean_x + th.sqrt(2 * diffusion) * dw
+ return x, mean_x
+
+ def __Heun_step(self, x, _, t, model, **model_kwargs):
+ w_cur = th.randn(x.size()).to(x)
+ dw = w_cur * th.sqrt(self.dt)
+ t_cur = th.ones(x.size(0)).to(x) * t
+ diffusion = self.diffusion(x, t_cur)
+ xhat = x + th.sqrt(2 * diffusion) * dw
+ K1 = self.drift(xhat, t_cur, model, **model_kwargs)
+ xp = xhat + self.dt * K1
+ K2 = self.drift(xp, t_cur + self.dt, model, **model_kwargs)
+ return (
+ xhat + 0.5 * self.dt * (K1 + K2),
+ xhat,
+ ) # at last time point we do not perform the heun step
+
+ def __forward_fn(self):
+ """TODO: generalize here by adding all private functions ending with steps to it"""
+ sampler_dict = {
+ "Euler": self.__Euler_Maruyama_step,
+ "Heun": self.__Heun_step,
+ }
+
+ try:
+ sampler = sampler_dict[self.sampler_type]
+ except:
+ raise NotImplementedError("Smapler type not implemented.")
+
+ return sampler
+
+ def sample(self, init, model, **model_kwargs):
+ """forward loop of sde"""
+ x = init
+ mean_x = init
+ samples = []
+ sampler = self.__forward_fn()
+ for ti in self.t[:-1]:
+ with th.no_grad():
+ x, mean_x = sampler(x, mean_x, ti, model, **model_kwargs)
+ samples.append(x)
+
+ return samples
+
+
+class ode:
+ """ODE solver class"""
+
+ def __init__(
+ self,
+ drift,
+ *,
+ t0,
+ t1,
+ sampler_type,
+ num_steps,
+ atol,
+ rtol,
+ do_shift=True,
+ time_shifting_factor=None,
+ ):
+ assert t0 < t1, "ODE sampler has to be in forward time"
+
+ self.drift = drift
+ self.do_shift = do_shift
+ self.t = th.linspace(t0, t1, num_steps)
+ if time_shifting_factor:
+ self.t = self.t / (self.t + time_shifting_factor - time_shifting_factor * self.t)
+ self.atol = atol
+ self.rtol = rtol
+ self.sampler_type = sampler_type
+
+ def sample(self, x, model, model_kwargs):
+ device = x[0].device if isinstance(x, tuple) else x.device
+ def _fn(t, x):
+ t = th.ones(x[0].size(0)).to(device) * t if isinstance(x, tuple) else th.ones(x.size(0)).to(device) * t
+ model_output = self.drift(x, t, model, **model_kwargs)
+ return model_output
+
+ t = self.t.to(device)
+ if self.do_shift:
+ mu = get_lin_function(y1=0.5, y2=1.15)(x.shape[1])
+ t = time_shift(mu, 1.0, t)
+ atol = [self.atol] * len(x) if isinstance(x, tuple) else [self.atol]
+ rtol = [self.rtol] * len(x) if isinstance(x, tuple) else [self.rtol]
+ samples = odeint(_fn, x, t, method=self.sampler_type, atol=atol, rtol=rtol)
+ return samples
diff --git a/transport/path.py b/transport/path.py
new file mode 100755
index 0000000000000000000000000000000000000000..3a5b1ea132c03dd324ada858053d23aa76de9be6
--- /dev/null
+++ b/transport/path.py
@@ -0,0 +1,201 @@
+import numpy as np
+import torch as th
+
+
+def expand_t_like_x(t, x):
+ """Function to reshape time t to broadcastable dimension of x
+ Args:
+ t: [batch_dim,], time vector
+ x: [batch_dim,...], data point
+ """
+ dims = [1] * len(x[0].size())
+ t = t.view(t.size(0), *dims)
+ return t
+
+
+#################### Coupling Plans ####################
+
+
+class ICPlan:
+ """Linear Coupling Plan"""
+
+ def __init__(self, sigma=0.0):
+ self.sigma = sigma
+
+ def compute_alpha_t(self, t):
+ """Compute the data coefficient along the path"""
+ return t, 1
+
+ def compute_sigma_t(self, t):
+ """Compute the noise coefficient along the path"""
+ return 1 - t, -1
+
+ def compute_d_alpha_alpha_ratio_t(self, t):
+ """Compute the ratio between d_alpha and alpha"""
+ return 1 / t
+
+ def compute_drift(self, x, t):
+ """We always output sde according to score parametrization;"""
+ t = expand_t_like_x(t, x)
+ alpha_ratio = self.compute_d_alpha_alpha_ratio_t(t)
+ sigma_t, d_sigma_t = self.compute_sigma_t(t)
+ drift = alpha_ratio * x
+ diffusion = alpha_ratio * (sigma_t**2) - sigma_t * d_sigma_t
+
+ return -drift, diffusion
+
+ def compute_diffusion(self, x, t, form="constant", norm=1.0):
+ """Compute the diffusion term of the SDE
+ Args:
+ x: [batch_dim, ...], data point
+ t: [batch_dim,], time vector
+ form: str, form of the diffusion term
+ norm: float, norm of the diffusion term
+ """
+ t = expand_t_like_x(t, x)
+ choices = {
+ "constant": norm,
+ "SBDM": norm * self.compute_drift(x, t)[1],
+ "sigma": norm * self.compute_sigma_t(t)[0],
+ "linear": norm * (1 - t),
+ "decreasing": 0.25 * (norm * th.cos(np.pi * t) + 1) ** 2,
+ "inccreasing-decreasing": norm * th.sin(np.pi * t) ** 2,
+ }
+
+ try:
+ diffusion = choices[form]
+ except KeyError:
+ raise NotImplementedError(f"Diffusion form {form} not implemented")
+
+ return diffusion
+
+ def get_score_from_velocity(self, velocity, x, t):
+ """Wrapper function: transfrom velocity prediction model to score
+ Args:
+ velocity: [batch_dim, ...] shaped tensor; velocity model output
+ x: [batch_dim, ...] shaped tensor; x_t data point
+ t: [batch_dim,] time tensor
+ """
+ t = expand_t_like_x(t, x)
+ alpha_t, d_alpha_t = self.compute_alpha_t(t)
+ sigma_t, d_sigma_t = self.compute_sigma_t(t)
+ mean = x
+ reverse_alpha_ratio = alpha_t / d_alpha_t
+ var = sigma_t**2 - reverse_alpha_ratio * d_sigma_t * sigma_t
+ score = (reverse_alpha_ratio * velocity - mean) / var
+ return score
+
+ def get_noise_from_velocity(self, velocity, x, t):
+ """Wrapper function: transfrom velocity prediction model to denoiser
+ Args:
+ velocity: [batch_dim, ...] shaped tensor; velocity model output
+ x: [batch_dim, ...] shaped tensor; x_t data point
+ t: [batch_dim,] time tensor
+ """
+ t = expand_t_like_x(t, x)
+ alpha_t, d_alpha_t = self.compute_alpha_t(t)
+ sigma_t, d_sigma_t = self.compute_sigma_t(t)
+ mean = x
+ reverse_alpha_ratio = alpha_t / d_alpha_t
+ var = reverse_alpha_ratio * d_sigma_t - sigma_t
+ noise = (reverse_alpha_ratio * velocity - mean) / var
+ return noise
+
+ def get_velocity_from_score(self, score, x, t):
+ """Wrapper function: transfrom score prediction model to velocity
+ Args:
+ score: [batch_dim, ...] shaped tensor; score model output
+ x: [batch_dim, ...] shaped tensor; x_t data point
+ t: [batch_dim,] time tensor
+ """
+ t = expand_t_like_x(t, x)
+ drift, var = self.compute_drift(x, t)
+ velocity = var * score - drift
+ return velocity
+
+ def compute_mu_t(self, t, x0, x1):
+ """Compute the mean of time-dependent density p_t"""
+ t = expand_t_like_x(t, x1)
+ alpha_t, _ = self.compute_alpha_t(t)
+ sigma_t, _ = self.compute_sigma_t(t)
+ if isinstance(x1, (list, tuple)):
+ return [alpha_t[i] * x1[i] + sigma_t[i] * x0[i] for i in range(len(x1))]
+ else:
+ return alpha_t * x1 + sigma_t * x0
+
+ def compute_xt(self, t, x0, x1):
+ """Sample xt from time-dependent density p_t; rng is required"""
+ xt = self.compute_mu_t(t, x0, x1)
+ return xt
+
+ def compute_ut(self, t, x0, x1, xt):
+ """Compute the vector field corresponding to p_t"""
+ t = expand_t_like_x(t, x1)
+ _, d_alpha_t = self.compute_alpha_t(t)
+ _, d_sigma_t = self.compute_sigma_t(t)
+ if isinstance(x1, (list, tuple)):
+ return [d_alpha_t * x1[i] + d_sigma_t * x0[i] for i in range(len(x1))]
+ else:
+ return d_alpha_t * x1 + d_sigma_t * x0
+
+ def plan(self, t, x0, x1):
+ xt = self.compute_xt(t, x0, x1)
+ ut = self.compute_ut(t, x0, x1, xt)
+ return t, xt, ut
+
+
+class VPCPlan(ICPlan):
+ """class for VP path flow matching"""
+
+ def __init__(self, sigma_min=0.1, sigma_max=20.0):
+ self.sigma_min = sigma_min
+ self.sigma_max = sigma_max
+ self.log_mean_coeff = (
+ lambda t: -0.25 * ((1 - t) ** 2) * (self.sigma_max - self.sigma_min) - 0.5 * (1 - t) * self.sigma_min
+ )
+ self.d_log_mean_coeff = lambda t: 0.5 * (1 - t) * (self.sigma_max - self.sigma_min) + 0.5 * self.sigma_min
+
+ def compute_alpha_t(self, t):
+ """Compute coefficient of x1"""
+ alpha_t = self.log_mean_coeff(t)
+ alpha_t = th.exp(alpha_t)
+ d_alpha_t = alpha_t * self.d_log_mean_coeff(t)
+ return alpha_t, d_alpha_t
+
+ def compute_sigma_t(self, t):
+ """Compute coefficient of x0"""
+ p_sigma_t = 2 * self.log_mean_coeff(t)
+ sigma_t = th.sqrt(1 - th.exp(p_sigma_t))
+ d_sigma_t = th.exp(p_sigma_t) * (2 * self.d_log_mean_coeff(t)) / (-2 * sigma_t)
+ return sigma_t, d_sigma_t
+
+ def compute_d_alpha_alpha_ratio_t(self, t):
+ """Special purposed function for computing numerical stabled d_alpha_t / alpha_t"""
+ return self.d_log_mean_coeff(t)
+
+ def compute_drift(self, x, t):
+ """Compute the drift term of the SDE"""
+ t = expand_t_like_x(t, x)
+ beta_t = self.sigma_min + (1 - t) * (self.sigma_max - self.sigma_min)
+ return -0.5 * beta_t * x, beta_t / 2
+
+
+class GVPCPlan(ICPlan):
+ def __init__(self, sigma=0.0):
+ super().__init__(sigma)
+
+ def compute_alpha_t(self, t):
+ """Compute coefficient of x1"""
+ alpha_t = th.sin(t * np.pi / 2)
+ d_alpha_t = np.pi / 2 * th.cos(t * np.pi / 2)
+ return alpha_t, d_alpha_t
+
+ def compute_sigma_t(self, t):
+ """Compute coefficient of x0"""
+ sigma_t = th.cos(t * np.pi / 2)
+ d_sigma_t = -np.pi / 2 * th.sin(t * np.pi / 2)
+ return sigma_t, d_sigma_t
+
+ def compute_d_alpha_alpha_ratio_t(self, t):
+ """Special purposed function for computing numerical stabled d_alpha_t / alpha_t"""
+ return np.pi / (2 * th.tan(t * np.pi / 2))
diff --git a/transport/transport.py b/transport/transport.py
new file mode 100755
index 0000000000000000000000000000000000000000..d4e7dce80850f0547fcf52d7d41be65171328a25
--- /dev/null
+++ b/transport/transport.py
@@ -0,0 +1,469 @@
+import enum
+import math
+from typing import Callable
+
+import numpy as np
+import torch as th
+
+from . import path
+from .integrators import ode, sde
+from .utils import mean_flat, time_shift, get_lin_function
+
+
+class ModelType(enum.Enum):
+ """
+ Which type of output the model predicts.
+ """
+
+ NOISE = enum.auto() # the model predicts epsilon
+ SCORE = enum.auto() # the model predicts \nabla \log p(x)
+ VELOCITY = enum.auto() # the model predicts v(x)
+
+
+class PathType(enum.Enum):
+ """
+ Which type of path to use.
+ """
+
+ LINEAR = enum.auto()
+ GVP = enum.auto()
+ VP = enum.auto()
+
+
+class WeightType(enum.Enum):
+ """
+ Which type of weighting to use.
+ """
+
+ NONE = enum.auto()
+ VELOCITY = enum.auto()
+ LIKELIHOOD = enum.auto()
+
+
+class Transport:
+ def __init__(self, *, model_type, path_type, loss_type, train_eps, sample_eps, snr_type, do_shift):
+ path_options = {
+ PathType.LINEAR: path.ICPlan,
+ PathType.GVP: path.GVPCPlan,
+ PathType.VP: path.VPCPlan,
+ }
+
+ self.loss_type = loss_type
+ self.model_type = model_type
+ self.path_sampler = path_options[path_type]()
+ self.train_eps = train_eps
+ self.sample_eps = sample_eps
+
+ self.snr_type = snr_type
+ self.do_shift = do_shift
+
+ def prior_logp(self, z):
+ """
+ Standard multivariate normal prior
+ Assume z is batched
+ """
+ shape = th.tensor(z.size())
+ N = th.prod(shape[1:])
+ _fn = lambda x: -N / 2.0 * np.log(2 * np.pi) - th.sum(x**2) / 2.0
+ return th.vmap(_fn)(z)
+
+ def check_interval(
+ self,
+ train_eps,
+ sample_eps,
+ *,
+ diffusion_form="SBDM",
+ sde=False,
+ reverse=False,
+ eval=False,
+ last_step_size=0.0,
+ ):
+ t0 = 0
+ t1 = 1
+ eps = train_eps if not eval else sample_eps
+ if type(self.path_sampler) in [path.VPCPlan]:
+ t1 = 1 - eps if (not sde or last_step_size == 0) else 1 - last_step_size
+
+ elif (type(self.path_sampler) in [path.ICPlan, path.GVPCPlan]) and (
+ self.model_type != ModelType.VELOCITY or sde
+ ): # avoid numerical issue by taking a first semi-implicit step
+ t0 = eps if (diffusion_form == "SBDM" and sde) or self.model_type != ModelType.VELOCITY else 0
+ t1 = 1 - eps if (not sde or last_step_size == 0) else 1 - last_step_size
+
+ if reverse:
+ t0, t1 = 1 - t0, 1 - t1
+
+ return t0, t1
+
+ def sample(self, x1, snr_type=None):
+ """Sampling x0 & t based on shape of x1 (if needed)
+ Args:
+ x1 - data point; [batch, *dim]
+ """
+ if isinstance(x1, (list, tuple)):
+ x0 = [th.randn_like(img_start) for img_start in x1]
+ else:
+ x0 = th.randn_like(x1)
+ t0, t1 = self.check_interval(self.train_eps, self.sample_eps)
+
+ if snr_type is None:
+ snr_type = self.snr_type
+
+ if snr_type.startswith("uniform"):
+ if "_" in snr_type:
+ _, t0, t1 = snr_type.split("_")
+ t0, t1 = float(t0), float(t1)
+ t = th.rand((len(x1),)) * (t1 - t0) + t0
+ elif snr_type == "lognorm":
+ u = th.normal(mean=0.0, std=1.0, size=(len(x1),))
+ t = 1 / (1 + th.exp(-u)) * (t1 - t0) + t0
+ else:
+ raise NotImplementedError("Not implemented snr_type %s" % snr_type)
+
+ if self.do_shift:
+ base_shift: float = 0.5
+ max_shift: float = 1.15
+ mu = get_lin_function(y1=base_shift, y2=max_shift)(x1.shape[1])
+ t = time_shift(mu, 1.0, t)
+
+ t = t.to(x1[0])
+ return t, x0, x1
+
+ def training_losses(self, model, x1, model_kwargs=None, extra_kwargs=None):
+ """Loss for training the score model
+ Args:
+ - model: backbone model; could be score, noise, or velocity
+ - x1: datapoint
+ - model_kwargs: additional arguments for the model
+ """
+ if model_kwargs == None:
+ model_kwargs = {}
+ t, x0, x1 = self.sample(x1)
+ t, xt, ut = self.path_sampler.plan(t, x0, x1)
+ B = len(x0)
+
+ if "cond" in extra_kwargs and extra_kwargs["cond"] is not None:
+ out = model(th.cat((xt, extra_kwargs["cond"]), dim=-1), timesteps=1 - t, **model_kwargs)
+ else:
+ out = model(xt, timesteps=1 - t, **model_kwargs)
+ model_output = -out
+
+ terms = {}
+ if self.model_type == ModelType.VELOCITY:
+ if isinstance(x1, (list, tuple)):
+ assert len(model_output) == len(ut) == len(x1)
+ for i in range(B):
+ assert (
+ model_output[i].shape == ut[i].shape == x1[i].shape
+ ), f"{model_output[i].shape} {ut[i].shape} {x1[i].shape}"
+ terms["task_loss"] = th.stack(
+ [((ut[i] - model_output[i]) ** 2).mean() for i in range(B)],
+ dim=0,
+ )
+ else:
+ if "img_mask" in model_kwargs:
+ # print("loss", model_output.shape, model_kwargs["img_mask"].shape, model_kwargs["img_mask"].sum(dim=1), model_kwargs["img_mask"].sum())
+ B, L, D = model_output.shape
+ img_mask = model_kwargs["img_mask"]
+ mask_loss = (model_output - ut) * img_mask.unsqueeze(-1) # [B, L, D]
+ terms["task_loss"] = (mask_loss ** 2).sum(dim=list(range(1, ut.dim()))) / (img_mask.sum(dim=1) * D)
+ else:
+ terms["task_loss"] = mean_flat(((model_output - ut) ** 2))
+ terms["loss"] = terms["task_loss"]
+ terms["task_loss"] = terms["task_loss"].clone().detach()
+ terms["t"] = t
+
+ return terms
+
+ def get_drift(self):
+ """member function for obtaining the drift of the probability flow ODE"""
+
+ def score_ode(x, t, model, **model_kwargs):
+ drift_mean, drift_var = self.path_sampler.compute_drift(x, t)
+ model_output = model(x, t, **model_kwargs)
+ return -drift_mean + drift_var * model_output # by change of variable
+
+ def noise_ode(x, t, model, **model_kwargs):
+ drift_mean, drift_var = self.path_sampler.compute_drift(x, t)
+ sigma_t, _ = self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))
+ model_output = model(x, t, **model_kwargs)
+ score = model_output / -sigma_t
+ return -drift_mean + drift_var * score
+
+ def velocity_ode(x, t, model, **model_kwargs):
+ if "cond" in model_kwargs and model_kwargs["cond"] is not None:
+ x = th.cat((x, model_kwargs["cond"]), dim=-1)
+ model_kwargs.pop("cond")
+ model_output = model(x, timesteps=t, **model_kwargs)
+ return model_output
+
+ if self.model_type == ModelType.NOISE:
+ drift_fn = noise_ode
+ elif self.model_type == ModelType.SCORE:
+ drift_fn = score_ode
+ else:
+ drift_fn = velocity_ode
+
+ def body_fn(x, t, model, **model_kwargs):
+ model_output = drift_fn(x, t, model, **model_kwargs)
+ assert model_output.shape == x.shape, "Output shape from ODE solver must match input shape"
+ return model_output
+
+ return body_fn
+
+ def get_score(
+ self,
+ ):
+ """member function for obtaining score of
+ x_t = alpha_t * x + sigma_t * eps"""
+ if self.model_type == ModelType.NOISE:
+ score_fn = (
+ lambda x, t, model, **kwargs: model(x, t, **kwargs)
+ / -self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))[0]
+ )
+ elif self.model_type == ModelType.SCORE:
+ score_fn = lambda x, t, model, **kwagrs: model(x, t, **kwagrs)
+ elif self.model_type == ModelType.VELOCITY:
+ score_fn = lambda x, t, model, **kwargs: self.path_sampler.get_score_from_velocity(
+ model(x, t, **kwargs), x, t
+ )
+ else:
+ raise NotImplementedError()
+
+ return score_fn
+
+
+class Sampler:
+ """Sampler class for the transport model"""
+
+ def __init__(
+ self,
+ transport,
+ ):
+ """Constructor for a general sampler; supporting different sampling methods
+ Args:
+ - transport: an tranport object specify model prediction & interpolant type
+ """
+
+ self.transport = transport
+ self.drift = self.transport.get_drift()
+ self.score = self.transport.get_score()
+
+ def __get_sde_diffusion_and_drift(
+ self,
+ *,
+ diffusion_form="SBDM",
+ diffusion_norm=1.0,
+ ):
+ def diffusion_fn(x, t):
+ diffusion = self.transport.path_sampler.compute_diffusion(x, t, form=diffusion_form, norm=diffusion_norm)
+ return diffusion
+
+ sde_drift = lambda x, t, model, **kwargs: self.drift(x, t, model, **kwargs) + diffusion_fn(x, t) * self.score(
+ x, t, model, **kwargs
+ )
+
+ sde_diffusion = diffusion_fn
+
+ return sde_drift, sde_diffusion
+
+ def __get_last_step(
+ self,
+ sde_drift,
+ *,
+ last_step,
+ last_step_size,
+ ):
+ """Get the last step function of the SDE solver"""
+
+ if last_step is None:
+ last_step_fn = lambda x, t, model, **model_kwargs: x
+ elif last_step == "Mean":
+ last_step_fn = (
+ lambda x, t, model, **model_kwargs: x + sde_drift(x, t, model, **model_kwargs) * last_step_size
+ )
+ elif last_step == "Tweedie":
+ alpha = self.transport.path_sampler.compute_alpha_t # simple aliasing; the original name was too long
+ sigma = self.transport.path_sampler.compute_sigma_t
+ last_step_fn = lambda x, t, model, **model_kwargs: x / alpha(t)[0][0] + (sigma(t)[0][0] ** 2) / alpha(t)[0][
+ 0
+ ] * self.score(x, t, model, **model_kwargs)
+ elif last_step == "Euler":
+ last_step_fn = (
+ lambda x, t, model, **model_kwargs: x + self.drift(x, t, model, **model_kwargs) * last_step_size
+ )
+ else:
+ raise NotImplementedError()
+
+ return last_step_fn
+
+ def sample_sde(
+ self,
+ *,
+ sampling_method="Euler",
+ diffusion_form="SBDM",
+ diffusion_norm=1.0,
+ last_step="Mean",
+ last_step_size=0.04,
+ num_steps=250,
+ ):
+ """returns a sampling function with given SDE settings
+ Args:
+ - sampling_method: type of sampler used in solving the SDE; default to be Euler-Maruyama
+ - diffusion_form: function form of diffusion coefficient; default to be matching SBDM
+ - diffusion_norm: function magnitude of diffusion coefficient; default to 1
+ - last_step: type of the last step; default to identity
+ - last_step_size: size of the last step; default to match the stride of 250 steps over [0,1]
+ - num_steps: total integration step of SDE
+ """
+
+ if last_step is None:
+ last_step_size = 0.0
+
+ sde_drift, sde_diffusion = self.__get_sde_diffusion_and_drift(
+ diffusion_form=diffusion_form,
+ diffusion_norm=diffusion_norm,
+ )
+
+ t0, t1 = self.transport.check_interval(
+ self.transport.train_eps,
+ self.transport.sample_eps,
+ diffusion_form=diffusion_form,
+ sde=True,
+ eval=True,
+ reverse=False,
+ last_step_size=last_step_size,
+ )
+
+ _sde = sde(
+ sde_drift,
+ sde_diffusion,
+ t0=t0,
+ t1=t1,
+ num_steps=num_steps,
+ sampler_type=sampling_method,
+ )
+
+ last_step_fn = self.__get_last_step(sde_drift, last_step=last_step, last_step_size=last_step_size)
+
+ def _sample(init, model, **model_kwargs):
+ xs = _sde.sample(init, model, **model_kwargs)
+ ts = th.ones(init.size(0), device=init.device) * t1
+ x = last_step_fn(xs[-1], ts, model, **model_kwargs)
+ xs.append(x)
+
+ assert len(xs) == num_steps, "Samples does not match the number of steps"
+
+ return xs
+
+ return _sample
+
+ def sample_ode(
+ self,
+ *,
+ sampling_method="dopri5",
+ num_steps=50,
+ atol=1e-6,
+ rtol=1e-3,
+ reverse=False,
+ do_shift=True,
+ time_shifting_factor=None,
+ strength=None
+ ):
+ """returns a sampling function with given ODE settings
+ Args:
+ - sampling_method: type of sampler used in solving the ODE; default to be Dopri5
+ - num_steps:
+ - fixed solver (Euler, Heun): the actual number of integration steps performed
+ - adaptive solver (Dopri5): the number of datapoints saved during integration; produced by interpolation
+ - atol: absolute error tolerance for the solver
+ - rtol: relative error tolerance for the solver
+ """
+
+ # for flux
+ drift = lambda x, t, model, **kwargs: -self.drift(x, th.ones_like(t) * (1 - t), model, **kwargs)
+
+ t0, t1 = self.transport.check_interval(
+ self.transport.train_eps,
+ self.transport.sample_eps,
+ sde=False,
+ eval=True,
+ reverse=reverse,
+ last_step_size=0.0,
+ )
+
+ if strength is not None:
+ t0 = (t1 - t0) * strength + t0
+
+ _ode = ode(
+ drift=drift,
+ t0=t0,
+ t1=t1,
+ sampler_type=sampling_method,
+ num_steps=num_steps,
+ atol=atol,
+ rtol=rtol,
+ do_shift=do_shift,
+ time_shifting_factor=time_shifting_factor,
+ )
+
+ return _ode.sample
+
+ def sample_ode_likelihood(
+ self,
+ *,
+ sampling_method="dopri5",
+ num_steps=50,
+ atol=1e-6,
+ rtol=1e-3,
+ ):
+ """returns a sampling function for calculating likelihood with given ODE settings
+ Args:
+ - sampling_method: type of sampler used in solving the ODE; default to be Dopri5
+ - num_steps:
+ - fixed solver (Euler, Heun): the actual number of integration steps performed
+ - adaptive solver (Dopri5): the number of datapoints saved during integration; produced by interpolation
+ - atol: absolute error tolerance for the solver
+ - rtol: relative error tolerance for the solver
+ """
+
+ def _likelihood_drift(x, t, model, **model_kwargs):
+ x, _ = x
+ eps = th.randint(2, x.size(), dtype=th.float, device=x.device) * 2 - 1
+ t = th.ones_like(t) * (1 - t)
+ with th.enable_grad():
+ x.requires_grad = True
+ grad = th.autograd.grad(th.sum(self.drift(x, t, model, **model_kwargs) * eps), x)[0]
+ logp_grad = th.sum(grad * eps, dim=tuple(range(1, len(x.size()))))
+ drift = self.drift(x, t, model, **model_kwargs)
+ return (-drift, logp_grad)
+
+ t0, t1 = self.transport.check_interval(
+ self.transport.train_eps,
+ self.transport.sample_eps,
+ sde=False,
+ eval=True,
+ reverse=False,
+ last_step_size=0.0,
+ )
+
+ _ode = ode(
+ drift=_likelihood_drift,
+ t0=t0,
+ t1=t1,
+ sampler_type=sampling_method,
+ num_steps=num_steps,
+ atol=atol,
+ rtol=rtol,
+ )
+
+ def _sample_fn(x, model, **model_kwargs):
+ init_logp = th.zeros(x.size(0)).to(x)
+ input = (x, init_logp)
+ drift, delta_logp = _ode.sample(input, model, **model_kwargs)
+ drift, delta_logp = drift[-1], delta_logp[-1]
+ prior_logp = self.transport.prior_logp(drift)
+ logp = prior_logp - delta_logp
+ return logp, drift
+
+ return _sample_fn
diff --git a/transport/utils.py b/transport/utils.py
new file mode 100755
index 0000000000000000000000000000000000000000..b1f8a7447b37ce72dace2f584fd09331d042c55c
--- /dev/null
+++ b/transport/utils.py
@@ -0,0 +1,44 @@
+import torch as th
+import math
+
+class EasyDict:
+ def __init__(self, sub_dict):
+ for k, v in sub_dict.items():
+ setattr(self, k, v)
+
+ def __getitem__(self, key):
+ return getattr(self, key)
+
+
+def mean_flat(x):
+ """
+ Take the mean over all non-batch dimensions.
+ """
+ return th.mean(x, dim=list(range(1, len(x.size()))))
+
+
+def log_state(state):
+ result = []
+
+ sorted_state = dict(sorted(state.items()))
+ for key, value in sorted_state.items():
+ # Check if the value is an instance of a class
+ if " 1600 * 1600:
+ aspect_ratio = target_size[0] / target_size[1]
+ target_area = 1600 * 1600
+ new_h = int((target_area / aspect_ratio) ** 0.5)
+ new_w = int(new_h * aspect_ratio)
+ target_size = (new_w, new_h)
+
+ self.sample_fn = self.sampler.sample_ode(
+ sampling_method=self.solver,
+ num_steps=upsampling_steps,
+ atol=self.atol,
+ rtol=self.rtol,
+ reverse=False,
+ do_shift=False,
+ time_shifting_factor=1.0,
+ strength=upsampling_noise
+ )
+
+ image = image.resize(((target_size[0] // 16) * 16, (target_size[1] // 16) * 16))
+ processed_image = self.image_transform(image)
+ processed_image = processed_image.to(self.device, non_blocking=True)
+ blank = torch.zeros_like(processed_image, device=self.device, dtype=self.dtype)
+ mask = torch.full((1, 1, processed_image.shape[1], processed_image.shape[2]), fill_value=1, device=self.device, dtype=self.dtype)
+ with torch.no_grad():
+ latent = self.ae.encode(processed_image[None].to(self.ae.dtype)).latent_dist.sample()
+ blank = self.ae.encode(blank[None].to(self.ae.dtype)).latent_dist.sample()
+ latent = (latent - self.ae.config.shift_factor) * self.ae.config.scaling_factor
+ blank = (blank - self.ae.config.shift_factor) * self.ae.config.scaling_factor
+ latent_h, latent_w = latent.shape[2:]
+
+ mask = rearrange(mask, "b c (h ph) (w pw) -> b (c ph pw) h w", ph=8, pw=8)
+ mask = rearrange(mask, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+
+ latent = latent.to(self.dtype)
+ blank = blank.to(self.dtype)
+ latent = rearrange(latent, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+ blank = rearrange(blank, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+
+ img_cond = torch.cat((blank, mask), dim=-1)
+
+ # Generate noise
+ noise = torch.randn([1, 16, latent_h, latent_w], device=self.device, generator=generator).to(self.dtype)
+ x = [[noise]]
+
+ inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[content_prompt], proportion_empty_prompts=0.0)
+ inp["img"] = inp["img"] * (1 - upsampling_noise) + latent * upsampling_noise
+ model_kwargs = dict(
+ txt=inp["txt"],
+ txt_ids=inp["txt_ids"],
+ txt_mask=inp["txt_mask"],
+ y=inp["vec"],
+ img_ids=inp["img_ids"],
+ img_mask=inp["img_mask"],
+ cond=img_cond,
+ guidance=torch.full((1,), cfg, device=self.device, dtype=self.dtype),
+ )
+ sample = self.sample_fn(
+ inp["img"], self.model.forward, model_kwargs
+ )[-1]
+
+ sample = sample[:1]
+ sample = rearrange(sample, "b (h w) (c ph pw) -> b c (h ph) (w pw)", ph=2, pw=2, h=latent_h // 2, w=latent_w // 2)
+ sample = self.ae.decode(sample / self.ae.config.scaling_factor + self.ae.config.shift_factor)[0]
+ sample = (sample + 1.0) / 2.0
+ sample.clamp_(0.0, 1.0)
+ sample = sample[0]
+
+ output_image = to_pil_image(sample.float())
+
+ return output_image
+
+ def process_images(
+ self, images: list[list[Image.Image]], text_prompt: list[str],
+ seed: int = 0,
+ cfg: int = 30,
+ steps: int = 30,
+ upsampling_steps: int = 10,
+ upsampling_noise: float = 0.4,
+ is_upsampling: bool =True):
+ """
+ Processes a list of images based on provided text prompts and settings,
+ with optional upsampling steps to improve image resolution or detail.
+
+ Parameters:
+ images (list[list[Image.Image]]): A grid-layout image collection, each row represents an in-context example or the current query,
+ where the current query should be placed in the last row.
+ The target image can be None in the input. The other images should be the PIL Image class (Image.Image).
+ text_prompt (list[str]): Three prompts, representing the layout prompt, task prompt, and content prompt respectively.
+ seed (int): A fixed integer seed to ensure reproducibility of the random elements in the processing.
+ cfg (int): The strength of Classifier-Free Diffusion Guidance.
+ steps (int): The number of sampling steps.
+ upsampling_steps (int): The number of denoising steps when upsampling.
+ upsampling_noise (float): When upsampling using SDEdit,
+ the noise is used as a starting point and less noise is added the higher the strength.
+ A value of 1 means added noise is maximum.
+ is_upsampling (bool, optional): A flag to indicate if upsampling should be applied using SDEdit.
+
+ Returns:
+ Processed images as a result of the algorithm, with optional upsampling applied based on the `is_upsampling` flag.
+ """
+
+ if seed == 0:
+ seed = random.randint(0, 2 ** 32 - 1)
+
+ self.sample_fn = self.sampler.sample_ode(
+ sampling_method=self.solver,
+ num_steps=steps,
+ atol=self.atol,
+ rtol=self.rtol,
+ reverse=False,
+ do_shift=True,
+ time_shifting_factor=self.time_shifting_factor,
+ )
+
+ # Use class grid size
+ grid_h, grid_w = self.grid_h, self.grid_w
+
+ # Ensure all images are RGB mode or None
+ for i in range(0, grid_h):
+ images[i] = [img.convert("RGB") if img is not None else None for img in images[i]]
+
+ # Adjust all image sizes
+ resolution = self.resolution
+ processed_images = []
+ mask_position = []
+ target_size = None
+ upsampling_size = None
+
+ for i in range(grid_h):
+ # Find the size of the first non-empty image in this row
+ reference_size = None
+ for j in range(0, grid_w):
+ if images[i][j] is not None:
+ if i == grid_h - 1 and upsampling_size is None:
+ upsampling_size = images[i][j].size
+
+ resized = resize_with_aspect_ratio(images[i][j], resolution, aspect_ratio=None)
+ reference_size = resized.size
+ if i == grid_h - 1 and target_size is None:
+ target_size = reference_size
+ break
+
+ # Process all images in this row
+ for j in range(0, grid_w):
+ if images[i][j] is not None:
+ target = resize_with_aspect_ratio(images[i][j], resolution, aspect_ratio=None)
+ if target.width <= target.height:
+ target = target.resize((reference_size[0], int(reference_size[0] / target.width * target.height)))
+ target = center_crop(target, reference_size)
+ elif target.width > target.height:
+ target = target.resize((int(reference_size[1] / target.height * target.width), reference_size[1]))
+ target = center_crop(target, reference_size)
+
+ processed_images.append(target)
+ if i == grid_h - 1:
+ mask_position.append(0)
+ else:
+ # If this row has a reference size, use it; otherwise use default size
+ if reference_size:
+ blank = Image.new('RGB', reference_size, (0, 0, 0))
+ else:
+ blank = Image.new('RGB', (resolution, resolution), (0, 0, 0))
+ processed_images.append(blank)
+ if i == grid_h - 1:
+ mask_position.append(1)
+
+ if len(mask_position) > 1 and sum(mask_position) > 1:
+ if target_size is None:
+ new_w = 384
+ else:
+ new_w = target_size[0]
+ for i in range(len(processed_images)):
+ if processed_images[i] is not None:
+ new_h = int(processed_images[i].height * (new_w / processed_images[i].width))
+ new_w = int(new_w / 16) * 16
+ new_h = int(new_h / 16) * 16
+ processed_images[i] = processed_images[i].resize((new_w, new_h))
+
+ # Build grid image and mask
+ with torch.autocast("cuda", self.dtype):
+ grid_image = []
+ fill_mask = []
+ for i in range(grid_h):
+ row_images = [self.image_transform(img) for img in processed_images[i * grid_w: (i + 1) * grid_w]]
+ if i == grid_h - 1:
+ row_masks = [torch.full((1, 1, row_images[0].shape[1], row_images[0].shape[2]), fill_value=m, device=self.device) for m in mask_position]
+ else:
+ row_masks = [torch.full((1, 1, row_images[0].shape[1], row_images[0].shape[2]), fill_value=0, device=self.device) for m in mask_position]
+
+ grid_image.append(torch.cat(row_images, dim=2).to(self.device, non_blocking=True))
+ fill_mask.append(torch.cat(row_masks, dim=3))
+ # Encode condition image
+ with torch.no_grad():
+ fill_cond = [self.ae.encode(img[None].to(self.ae.dtype)).latent_dist.sample()[0] for img in grid_image]
+ fill_cond = [(img - self.ae.config.shift_factor) * self.ae.config.scaling_factor for img in fill_cond]
+
+ # Rearrange mask
+ fill_mask = [rearrange(mask, "b c (h ph) (w pw) -> b (c ph pw) h w", ph=8, pw=8) for mask in fill_mask]
+ fill_mask = [rearrange(mask, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2) for mask in fill_mask]
+
+ fill_cond = [img.to(self.dtype) for img in fill_cond]
+ fill_cond = [rearrange(img.unsqueeze(0), "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2) for img in fill_cond]
+
+ fill_cond = torch.cat(fill_cond, dim=1)
+ fill_mask = torch.cat(fill_mask, dim=1)
+ img_cond = torch.cat((fill_cond, fill_mask), dim=-1)
+
+ # Generate sample
+ noise = []
+ sliced_subimage = []
+ rng = torch.Generator(device=self.device).manual_seed(int(seed))
+ for sub_img in grid_image:
+ h, w = sub_img.shape[-2:]
+ sliced_subimage.append((h, w))
+ latent_w, latent_h = w // 8, h // 8
+ noise.append(torch.randn([1, 16, latent_h, latent_w], device=self.device, generator=rng).to(self.dtype))
+ x = [noise]
+
+ with torch.no_grad():
+ inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[' '.join(text_prompt)], proportion_empty_prompts=0.0)
+
+ model_kwargs = dict(
+ txt=inp["txt"],
+ txt_ids=inp["txt_ids"],
+ txt_mask=inp["txt_mask"],
+ y=inp["vec"],
+ img_ids=inp["img_ids"],
+ img_mask=inp["img_mask"],
+ cond=img_cond,
+ guidance=torch.full((1,), cfg, device=self.device, dtype=self.dtype),
+ )
+ samples = self.sample_fn(
+ inp["img"], self.model.forward, model_kwargs
+ )[-1]
+
+ # Get query row
+ samples = samples[:1]
+ row_samples = []
+ start = 0
+ for size in sliced_subimage:
+ end = start + (size[0] * size[1] // 256)
+ latent_h = size[0] // 8
+ latent_w = size[1] // 8
+ row_sample = samples[:, start:end, :]
+ row_sample = rearrange(row_sample, "b (h w) (c ph pw) -> b c (h ph) (w pw)", ph=2, pw=2, h=latent_h//2, w=latent_w//2)
+ row_sample = self.ae.decode(row_sample / self.ae.config.scaling_factor + self.ae.config.shift_factor)[0]
+ row_sample = (row_sample + 1.0) / 2.0
+ row_sample.clamp_(0.0, 1.0)
+ row_samples.append(row_sample[0])
+ start = end
+
+ # Convert all samples to PIL images
+ output_images = []
+ for row_sample in row_samples:
+ output_image = to_pil_image(row_sample.float())
+ output_images.append(output_image)
+
+ ret = []
+ ret_w = output_images[-1].width
+ ret_h = output_images[-1].height
+
+ row_start = (grid_h - 1) * grid_w
+ row_end = grid_h * grid_w
+ for i in range(row_start, row_end):
+ # when the image is masked, then output it
+ if True: # images[i] is None:
+ cropped = output_images[-1].crop(((i - row_start) * ret_w // self.grid_w, 0, ((i - row_start) + 1) * ret_w // self.grid_w, ret_h))
+ ret.append(cropped)
+ if mask_position[i - row_start] and is_upsampling:
+ upsampled = self.upsampling(
+ cropped,
+ upsampling_size,
+ cfg,
+ upsampling_steps=upsampling_steps,
+ upsampling_noise=upsampling_noise,
+ generator=rng,
+ content_prompt=text_prompt[2])
+ ret.append(upsampled)
+
+ return ret
\ No newline at end of file