Spaces:

hsiangyualex
/

Mbi2Spi

Sleeping

App Files Files

hsiangyualex commited on Feb 4

Commit

f97a499

verified ·

1 Parent(s): af10d58

Upload 64 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +156 -0
base/__init__.py +0 -0
base/base_modules.py +301 -0
base/base_segmentation.py +230 -0
base/base_wandb_model.py +107 -0
checkpoint/stage_i.pkl +3 -0
checkpoint/stage_ii.pkl +3 -0
ckpt/BiomedCLIP/biomed-vlp-eval.svg +1 -0
ckpt/BiomedCLIP/biomed_clip_example.ipynb +0 -0
ckpt/BiomedCLIP/config.json +17 -0
ckpt/BiomedCLIP/open_clip_config.json +31 -0
ckpt/BiomedCLIP/special_tokens_map.json +7 -0
ckpt/BiomedCLIP/tokenizer.json +0 -0
ckpt/BiomedCLIP/tokenizer_config.json +15 -0
ckpt/BiomedCLIP/vocab.txt +0 -0
configs/confocal.cfg +36 -0
configs/confocal_marker.cfg +26 -0
configs/convertion.cfg +25 -0
configs/extend_1.cfg +36 -0
configs/extend_2.cfg +26 -0
configs/full.cfg +39 -0
configs/imc.cfg +36 -0
configs/translation.cfg +36 -0
markers.py +136 -0
models/modules/biomedclip.py +114 -0
models/modules/dct.py +305 -0
models/modules/networks.py +714 -0
test_data/1.npz +3 -0
test_data/10.npz +3 -0
test_data/11.npz +3 -0
test_data/12.npz +3 -0
test_data/13.npz +3 -0
test_data/14.npz +3 -0
test_data/15.npz +3 -0
test_data/16.npz +3 -0
test_data/17.npz +3 -0
test_data/18.npz +3 -0
test_data/19.npz +3 -0
test_data/2.npz +3 -0
test_data/20.npz +3 -0
test_data/21.npz +3 -0
test_data/22.npz +3 -0
test_data/23.npz +3 -0
test_data/24.npz +3 -0
test_data/25.npz +3 -0
test_data/26.npz +3 -0
test_data/27.npz +3 -0
test_data/28.npz +3 -0
test_data/29.npz +3 -0
test_data/3.npz +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '3'
+import gradio as gr
+import yaml
+import torch
+import torch.nn.functional as F
+import numpy as np
+import pandas as pd
+from models.modules.networks import PromptAttentionUNet, HighResEnhancer
+from models.modules.biomedclip import BiomedCLIPTextEncoder
+from monai.inferers import sliding_window_inference
+from markers import breast_markers, prostatic_markers, pancreatic_markers
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# load the convertion model
+# load the cfg file for convertion
+cfg_file = 'configs/{}.cfg'.format('convertion')
+with open(cfg_file, 'r') as f:
+    cfg = yaml.safe_load(f)
+    print("successfully loaded config file: ", cfg)
+# convertion models
+convertion_ckpt = './checkpoint/stage_ii.pkl'
+convertion_net = PromptAttentionUNet(in_channels=cfg['MODEL']['IMC_IN'], out_channels=cfg['MODEL']['IMC_OUT'], channels=(128, 256, 512, 1024, 2048))
+prompt_model = BiomedCLIPTextEncoder(device=device)
+# load state_dict
+state_dict = torch.load(convertion_ckpt, map_location='cpu')['generator']
+# remove all the 'module.' prefix
+state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
+convertion_net.load_state_dict(state_dict)
+# load the translation model
+cfg_file = 'configs/{}.cfg'.format('translation')
+with open(cfg_file, 'r') as f:
+    cfg = yaml.safe_load(f)
+    print("successfully loaded config file: ", cfg)
+translation_ckpt = './checkpoint/stage_i.pkl'
+imc_net = HighResEnhancer(model_name=cfg['MODEL']['TIMM_MODEL'],
+                               in_channels=cfg['MODEL']['IMC_IN'],
+                               out_channels=cfg['MODEL']['IMC_OUT'],
+                               norm=cfg['MODEL']['NORM'],
+                               use_dilated_bottleneck=True)
+# load state_dict for IMC
+state_dict = torch.load(translation_ckpt, map_location='cpu')['imc_G']
+# remove all the 'module.0.' prefix
+state_dict = {k.replace('module.0.', ''): v for k, v in state_dict.items()}
+# remove the key "sobel.filter" in the state_dict
+state_dict.pop('sobel.filter.weight')
+imc_net.load_state_dict(state_dict, strict=False)
+convertion_net.eval().to(device)
+imc_net.eval().to(device)
+# load the metadata for demo data
+df = pd.read_csv('./test_data/test_metadata.csv')
+breast_df = df[df['source'] == 'BreastCancer_V2']
+prostatic_df = df[df['source'] == 'ProstaticCancer_V2']
+pancreatic_df = df[df['source'] == 'PancreaticCancer_V2']
+def load_image(pair_index):
+    # select the item from the dataframe and convert to Series using `squeeze()`
+    item = df[df['name'] == pair_index].squeeze()
+    data = np.load(item['path'])['arr_0']
+    x1 = data[:, :, 0]
+    x2 = data[:, :, 1]
+    return gr.Image(value=x1), gr.Image(value=x2)
+def generate_imc(x1, x2, marker_name):
+    # stage I
+    inputs = np.concatenate([x1, x2[:, :, 2:3]], axis=2)
+    # normalize to [0, 1]
+    inputs = inputs / 255.0
+    # to tensor
+    inputs = torch.from_numpy(inputs.transpose(2, 0, 1)).unsqueeze(0).float()
+    # rescale to [-1, 1]
+    inputs = 2 * inputs - 1
+    output = sliding_window_inference(inputs.to(device), roi_size=(320, 320), sw_batch_size=2, predictor=imc_net, overlap=0.5)
+    output = F.tanh(output)
+    # to numpy
+    pred_nuclei = output[0].detach().cpu().numpy().transpose(1, 2, 0)
+    pred_nuclei = (pred_nuclei + 1) / 2  # normalize to [0, 1]
+    # stage II
+    nuclei_inputs = torch.from_numpy(pred_nuclei).permute(2, 0, 1).unsqueeze(0).float()
+    # rescale to [-1, 1]
+    nuclei_inputs = 2 * nuclei_inputs - 1
+    prompt_in = torch.as_tensor(prompt_model([marker_name])).to(device)
+    output = F.tanh(convertion_net(nuclei_inputs.to(device), prompt_in))
+    marker = output[0].detach().cpu().numpy().transpose(1, 2, 0)
+    marker = (marker + 1) / 2 # normalize to [0, 1]
+    # visualization
+    vis = np.concatenate([marker, np.zeros_like(pred_nuclei, dtype=np.float32), pred_nuclei], axis=2)
+    # normalize to [0, 255] and convert to uint8
+    vis = (vis * 255).astype(np.uint8)
+    return gr.Image(value=vis)
+# Function to update the second dropdown based on the first dropdown's selection
+def update_dropdown_by_tissue(selected_category):
+    if selected_category == "Breast":
+        image_selector = gr.Dropdown(choices=breast_df['name'].values.tolist(), value=breast_df['name'].values[0], interactive=True)
+        marker_selector = gr.Dropdown(choices=breast_markers, value=breast_markers[0], interactive=True)
+    elif selected_category == "Pancreatic":
+        image_selector =  gr.Dropdown(choices=pancreatic_df['name'].values.tolist(), value=pancreatic_df['name'].values[0], interactive=True)
+        marker_selector = gr.Dropdown(choices=pancreatic_markers, value=pancreatic_markers[0], interactive=True)
+    elif selected_category == "Prostatic":
+        image_selector =  gr.Dropdown(choices=prostatic_df['name'].values.tolist(), value=prostatic_df['name'].values[0], interactive=True)
+        marker_selector = gr.Dropdown(choices=prostatic_markers, value=prostatic_markers[0], interactive=True)
+    return [image_selector, marker_selector]
+# Create the Gradio interface
+def create_gradio_ui():
+    with gr.Blocks() as demo:
+        with gr.Tab("Mbi2Spi"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    with gr.Row():
+                        # image visualizer
+                        brightfield = gr.Image(label="Brightfield Image", type="numpy", interactive=False)
+                        aux = gr.Image(type="numpy", visible=False, interactive=False)
+                    with gr.Row():
+                        with gr.Column():
+                            # tissue selector (Breast, Pancreatic, Prostatic)
+                            tissue_selector = gr.Dropdown(choices=["Breast", "Pancreatic", "Prostatic"], label="Select Tissue Type")
+                            # marker selector
+                            marker_selector = gr.Dropdown(label="Marker Selector", interactive=False)
+                        with gr.Column():
+                            # image selector
+                            image_selector = gr.Dropdown(label="Brightfield Selector", interactive=False)
+                            # update the image selector based on the tissue type
+                            tissue_selector.change(update_dropdown_by_tissue, inputs=tissue_selector, outputs=[image_selector, marker_selector])
+                with gr.Column(scale=1):
+                    output_image = gr.Image(label="Generated Image", type="numpy")
+                    button1 = gr.Button("Predict IMC")
+                    # Load the selected image and update the input image and infrared image
+                    image_selector.change(load_image, inputs=image_selector, outputs=[brightfield, aux])
+                    # Event handler for button click
+                    button1.click(generate_imc, inputs=[brightfield, aux, marker_selector], outputs=output_image)
+    return demo
+# Launch the demo
+if __name__ == '__main__':
+    demo = create_gradio_ui()
+    demo.launch(show_error=True)

base/__init__.py ADDED Viewed

File without changes

base/base_modules.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+norm_dict = {'BATCH': nn.BatchNorm2d, 'INSTANCE': nn.InstanceNorm2d, 'GROUP': nn.GroupNorm}
+NUM_GROUPS = 16
+__all__ = ['ConvNorm', 'ConvBlock', 'ConvBottleNeck', 'ResBlock', 'ResBottleneck', 'PromptResBlock', 'PromptResBottleneck', 'PromptAttentionModule', 'norm_dict', 'SobelEdge']
+class Identity(nn.Module):
+    """
+    Identity mapping for building a residual connection
+    """
+    def __init__(self):
+        super().__init__()
+    def forward(self, x):
+        return x
+class ConvNorm(nn.Module):
+    """
+    Convolution and normalization
+    """
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, leaky=True, norm='INSTANCE', activation=True):
+        super().__init__()
+        # determine basic attributes
+        self.norm_type = norm
+        padding = (kernel_size - 1) // 2
+        # activation, support PReLU and common ReLU
+        if activation:
+            self.act = nn.LeakyReLU() if leaky else nn.ReLU(inplace=False)
+            # self.act = nn.ELU() if leaky else nn.ReLU(inplace=True)
+        else:
+            self.act = None
+        # instantiate layers
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
+        if norm in ['BATCH', 'INSTANCE']:
+            norm_layer = norm_dict[norm]
+            self.norm = norm_layer(out_channels)
+        elif norm == 'GROUP':
+            norm_layer = norm_dict[norm]
+            self.norm = norm_layer(NUM_GROUPS, in_channels)
+        elif norm == 'NONE':
+            self.norm = nn.Identity()
+        else:
+            raise NotImplementedError(f'Normalization type {norm} not implemented')
+    def basic_forward(self, x):
+        x = self.conv(x)
+        x = self.norm(x)
+        if self.act:
+            x = self.act(x)
+        return x
+    def group_forward(self, x):
+        x = self.norm(x)
+        if self.act:
+            x = self.act(x)
+        x = self.conv(x)
+        return x
+    def forward(self, x):
+        if self.norm_type in ['BATCH', 'INSTANCE']:
+            return self.basic_forward(x)
+        else:
+            return self.group_forward(x)
+class PromptAttentionModule(nn.Module):
+    def __init__(self, in_channels: int, prompt_channels: int, mid_channels: int) -> None:
+        super().__init__()
+        self.gap = nn.AdaptiveAvgPool2d(1)
+        self.conv_down = nn.Linear(in_channels, mid_channels)
+        self.prompt_down = nn.Linear(prompt_channels, mid_channels)
+        self.fc = nn.Linear(2 * mid_channels, in_channels)
+    def forward(self, x: torch.Tensor, prompt_in: torch.Tensor):
+        """
+        Args:
+            x: (B, C_im, H, W)
+            prompt_in: (B, C_prompt)
+        """
+        x_gap = self.gap(x).squeeze(-1).squeeze(-1)  # (B, C_im)
+        x_gap = self.conv_down(x_gap)  # (B, C_mid)
+        prompt_down = self.prompt_down(prompt_in)  # (B, C_mid)
+        gating = torch.cat([x_gap, prompt_down], dim=-1)  # (B, 2 * C_mid)
+        gating = F.sigmoid(self.fc(F.relu(gating)))[..., None, None]  # (B, C_im, 1, 1)
+        return x * gating
+class ConvBlock(nn.Module):
+    """
+    Convolutional blocks
+    """
+    def __init__(self, in_channels, out_channels, stride=1, leaky=False, norm='INSTANCE'):
+        super().__init__()
+        self.norm_type = norm
+        # activation, support PReLU and common ReLU
+        self.act = nn.LeakyReLU() if leaky else nn.ReLU(inplace=False)
+        # self.act = nn.ELU() if leaky else nn.ReLU(inplace=True)
+        self.conv1 = ConvNorm(in_channels, out_channels, 3, stride, leaky, norm, True)
+        self.conv2 = ConvNorm(out_channels, out_channels, 3, 1, leaky, norm, False)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.conv2(out)
+        if self.norm_type != 'GROUP':
+            out = self.act(out)
+        return out
+class ResBlock(nn.Module):
+    """
+    Residual blocks
+    """
+    def __init__(self, in_channels, out_channels, stride=1, use_dropout=False, leaky=False, norm='INSTANCE'):
+        super().__init__()
+        self.norm_type = norm
+        self.act = nn.LeakyReLU() if leaky else nn.ReLU(inplace=False)
+        self.dropout = nn.Dropout2d(p=0.1) if use_dropout else None
+        # self.act = nn.ELU() if leaky else nn.ReLU(inplace=True)
+        self.conv1 = ConvNorm(in_channels, out_channels, 3, stride, leaky, norm, True)
+        self.conv2 = ConvNorm(out_channels, out_channels, 3, 1, leaky, norm, False)
+        need_map = in_channels != out_channels or stride != 1
+        self.id = ConvNorm(in_channels, out_channels, 1, stride, leaky, norm, False) if need_map else Identity()
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.conv2(out)
+        identity = self.id(identity)
+        out = out + identity
+        if self.norm_type != 'GROUP':
+            out = self.act(out)
+        if self.dropout:
+            out = self.dropout(out)
+        return out
+class ConvBottleNeck(nn.Module):
+    """
+    Convolutional bottleneck blocks
+    """
+    def __init__(self, in_channels, out_channels, stride=1, leaky=False, norm='INSTANCE'):
+        super().__init__()
+        self.norm_type = norm
+        middle_channels = in_channels // 4
+        self.act = nn.LeakyReLU() if leaky else nn.ReLU(inplace=False)
+        # self.act = nn.ELU() if leaky else nn.ReLU(inplace=True)
+        self.conv1 = ConvNorm(in_channels, middle_channels, 1, 1, leaky, norm, True)
+        self.conv2 = ConvNorm(middle_channels, middle_channels, 3, stride, leaky, norm, True)
+        self.conv3 = ConvNorm(middle_channels, out_channels, 1, 1, leaky, norm, False)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.conv3(out)
+        if self.norm_type != 'GROUP':
+            out = self.act(out)
+        return out
+class ResBottleneck(nn.Module):
+    """
+    Residual bottleneck blocks
+    """
+    def __init__(self, in_channels, out_channels, stride=1, use_dropout=False, leaky=False, norm='INSTANCE'):
+        super().__init__()
+        self.norm_type = norm
+        middle_channels = in_channels // 4
+        self.act = nn.LeakyReLU() if leaky else nn.ReLU(inplace=False)
+        self.dropout = nn.Dropout2d(p=0.1) if use_dropout else None
+        # self.act = nn.ELU() if leaky else nn.ReLU(inplace=True)
+        self.conv1 = ConvNorm(in_channels, middle_channels, 1, 1, leaky, norm, True)
+        self.conv2 = ConvNorm(middle_channels, middle_channels, 3, stride, leaky, norm, True)
+        self.conv3 = ConvNorm(middle_channels, out_channels, 1, 1, leaky, norm, False)
+        need_map = in_channels != out_channels or stride != 1
+        self.id = ConvNorm(in_channels, out_channels, 1, stride, leaky, norm, False) if need_map else Identity()
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.conv3(out)
+        identity = self.id(identity)
+        out = out + identity
+        if self.norm_type != 'GROUP':
+            out = self.act(out)
+        if self.dropout:
+            out = self.dropout(out)
+        return out
+class PromptResBlock(nn.Module):
+    """
+    Residual blocks
+    """
+    def __init__(self, in_channels, out_channels, stride=1, use_dropout=False, leaky=False, norm='INSTANCE'):
+        super().__init__()
+        self.norm_type = norm
+        self.act = nn.LeakyReLU() if leaky else nn.ReLU(inplace=False)
+        self.dropout = nn.Dropout2d(p=0.1) if use_dropout else None
+        # self.act = nn.ELU() if leaky else nn.ReLU(inplace=True)
+        self.conv1 = ConvNorm(in_channels, out_channels, 3, stride, leaky, norm, True)
+        self.conv2 = ConvNorm(out_channels, out_channels, 3, 1, leaky, norm, False)
+        self.attn = PromptAttentionModule(out_channels, 512, out_channels // 4)
+        need_map = in_channels != out_channels or stride != 1
+        self.id = ConvNorm(in_channels, out_channels, 1, stride, leaky, norm, False) if need_map else Identity()
+    def forward(self, x, prompt_in):
+        identity = x
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.attn(out, prompt_in)
+        identity = self.id(identity)
+        out = out + identity
+        if self.norm_type != 'GROUP':
+            out = self.act(out)
+        if self.dropout:
+            out = self.dropout(out)
+        return out
+class PromptResBottleneck(nn.Module):
+    """
+    Residual bottleneck blocks
+    """
+    def __init__(self, in_channels, out_channels, stride=1, use_dropout=False, leaky=False, norm='INSTANCE'):
+        super().__init__()
+        self.norm_type = norm
+        middle_channels = in_channels // 4
+        self.act = nn.LeakyReLU() if leaky else nn.ReLU(inplace=False)
+        self.dropout = nn.Dropout2d(p=0.1) if use_dropout else None
+        # self.act = nn.ELU() if leaky else nn.ReLU(inplace=True)
+        self.conv1 = ConvNorm(in_channels, middle_channels, 1, 1, leaky, norm, True)
+        self.conv2 = ConvNorm(middle_channels, middle_channels, 3, stride, leaky, norm, True)
+        self.conv3 = ConvNorm(middle_channels, out_channels, 1, 1, leaky, norm, False)
+        self.attn = PromptAttentionModule(out_channels, 512, out_channels // 4)
+        need_map = in_channels != out_channels or stride != 1
+        self.id = ConvNorm(in_channels, out_channels, 1, stride, leaky, norm, False) if need_map else Identity()
+    def forward(self, x, prompt_in):
+        identity = x
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.conv3(out)
+        out = self.attn(out, prompt_in)
+        identity = self.id(identity)
+        out = out + identity
+        if self.norm_type != 'GROUP':
+            out = self.act(out)
+        if self.dropout:
+            out = self.dropout(out)
+        return out
+class SobelEdge(nn.Module):
+    def __init__(self, input_dim, channels, kernel_size=3, stride=1):
+        super().__init__()
+        conv = getattr(nn, 'Conv%dd' % input_dim)
+        self.filter = conv(channels, channels, kernel_size, stride, padding=(kernel_size - 1) // 2,
+                           groups=channels, bias=False)
+        sobel = [[1, 2, 1], [0, 0, 0], [-1, -2, -1]]
+        sobel_kernel = torch.tensor(sobel, dtype=torch.float32).unsqueeze(0).expand([channels, 1] + [kernel_size] * input_dim)
+        self.filter.weight = nn.Parameter(sobel_kernel, requires_grad=False)
+    def forward(self, x):
+        with torch.no_grad():
+            out = self.filter(x)
+        return out

base/base_segmentation.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import os
+import numpy as np
+import torch
+import torch.distributed as dist
+from torch.cuda.amp import GradScaler
+from abc import ABC, abstractmethod
+from utils.iteration.iterator import MetricMeter
+from utils.ddp_utils import gather_object_across_processes
+class BaseSegmentationModel(ABC):
+    """
+    This class is an abstract base class (ABC) for segmentation models.
+    To create a subclass, you need to implement the following four methods:
+       -- <__init__>:                      initialize the class.
+       -- <set_input>:                     unpack data from dataset.
+       -- <optimize_parameters>:           calculate losses, gradients, and update network weights.
+       -- <evaluate_one_step>:                      performance evaluation.
+    """
+    def __init__(self, cfg, num_classes, amp=False):
+        # initialize training CUDA devices
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # training configuration
+        self.cfg = cfg
+        self.num_classes = num_classes
+        self.is_mixed = amp
+        self.scaler = GradScaler()
+        self.start_epoch = -1
+        # initialize networks, criterion, optimizer and scheduler
+        self.network = None
+        self.criterion = None
+        self.optimizer = None
+        self.scheduler = None
+        # visualization
+        self.visual_names = []
+        self.loss_names = []
+    def train(self):
+        self.network.train()
+        return self
+    def eval(self):
+        self.network.eval()
+        return self
+    def training(self):
+        return self.network.training
+    def initialize_metric_meter(self, class_list):
+        self.class_list = class_list
+        self.metric_meter = MetricMeter(metrics=['dice', 'hd95', 'asd'], class_names=class_list, subject_names=['name'])
+        self.train_loss = MetricMeter(metrics=self.loss_names, class_names=['train'])
+        self.val_loss = MetricMeter(metrics=['loss'], class_names=['val'])
+    def update_loss_meter(self, print=False):
+        loss_dict = {}
+        for loss_name in self.loss_names:
+            try:
+                loss_value = float(getattr(self, loss_name))
+                loss_list = gather_object_across_processes(loss_value)
+                loss_value = np.mean(loss_list)
+            except:
+                continue
+            loss_dict['train_{}'.format(loss_name)] = loss_value
+        self.train_loss.update(loss_dict)
+        stats = self.train_loss.report(print_stats=print, mean_only=True)
+        return stats
+    @abstractmethod
+    def set_input(self, *args, **kwargs):
+        raise NotImplementedError
+    @abstractmethod
+    def optimize_parameters(self, *args, **kwargs):
+        raise NotImplementedError
+    @abstractmethod
+    def evaluate_one_step(self, *args, **kwargs):
+        raise NotImplementedError
+    def load_networks(self, ckpt_path, resume_training=False):
+        checkpoint = torch.load(ckpt_path, map_location=self.device)
+        print('Load ckpt weight: {}'.format(ckpt_path))
+        self.network.load_state_dict(checkpoint['net'])
+        if resume_training:
+            print('Load training config for breakpoint continuation')
+            self.optimizer.load_state_dict(checkpoint['optimizer'])
+            self.scheduler.load_state_dict(checkpoint['scheduler'])
+            self.scaler.load_state_dict(checkpoint['scaler'])
+            self.start_epoch = checkpoint['epoch']
+    def save_networks(self, epoch_index, save_dir):
+        if dist.get_rank() == 0:
+            checkpoint = {
+                "net": self.network.state_dict(),
+                'optimizer': self.optimizer.state_dict(),
+                'scheduler': self.scheduler.state_dict(),
+                'scaler': self.scaler.state_dict(),
+                "epoch": epoch_index
+            }
+            torch.save(checkpoint,
+                       os.path.join(save_dir, 'Epoch_{}.pkl'.format(epoch_index + 1)))
+class MultiNetworkSegmentationModel(ABC):
+    """
+    This class is an abstract base class (ABC) for segmentation models.
+    To create a subclass, you need to implement the following four methods:
+       -- <__init__>:                      initialize the class.
+       -- <set_input>:                     unpack data from dataset.
+       -- <optimize_parameters>:           calculate losses, gradients, and update network weights.
+       -- <evaluate_one_step>:                      performance evaluation.
+    """
+    def __init__(self, cfg, num_classes, amp=False):
+        # initialize training CUDA devices
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # training configuration
+        self.cfg = cfg
+        self.num_classes = num_classes
+        self.is_mixed = amp
+        self.scaler = GradScaler()
+        self.start_epoch = -1
+        # initialize networks, criterion, optimizer and scheduler
+        self.net_names = []
+        # visualization
+        self.visual_names = []
+        self.loss_names = []
+    def train(self):
+        for name in self.net_names:
+            net = getattr(self, name)
+            net.train()
+        return self
+    def eval(self):
+        for name in self.net_names:
+            net = getattr(self, name)
+            net.eval()
+        return self
+    def training(self):
+        return getattr(self, self.net_names[0]).training
+    def initialize_metric_meter(self, class_list):
+        self.class_list = class_list
+        self.metric_meter = MetricMeter(metrics=['dice', 'hd95', 'asd'], class_names=class_list, subject_names=['name'])
+        self.train_loss = MetricMeter(metrics=self.loss_names, class_names=['train'])
+        self.val_loss = MetricMeter(metrics=['loss'], class_names=['val'])
+    def update_loss_meter(self, print=False):
+        loss_dict = {}
+        for loss_name in self.loss_names:
+            try:
+                loss_value = float(getattr(self, loss_name))
+                loss_list = gather_object_across_processes(loss_value)
+                loss_value = np.mean(loss_list)
+            except:
+                continue
+            loss_dict['train_{}'.format(loss_name)] = loss_value
+        self.train_loss.update(loss_dict)
+        stats = self.train_loss.report(print_stats=print, mean_only=True)
+        return stats
+    @abstractmethod
+    def set_input(self, *args, **kwargs):
+        raise NotImplementedError
+    @abstractmethod
+    def optimize_parameters(self, *args, **kwargs):
+        raise NotImplementedError
+    @abstractmethod
+    def evaluate_one_step(self, *args, **kwargs):
+        raise NotImplementedError
+    def load_networks(self, ckpt_path, resume_training=False, strict=True):
+        checkpoint = torch.load(ckpt_path, map_location=self.device)
+        print('Load ckpt weight: {}'.format(ckpt_path))
+        if resume_training:
+            print('Load training config for breakpoint continuation')
+            self.scaler.load_state_dict(checkpoint['scaler'])
+            self.start_epoch = checkpoint['epoch']
+        for name in self.net_names:
+            try:
+                getattr(self, name).load_state_dict(checkpoint[name], strict=strict)
+                if resume_training:
+                    getattr(self, '{}_optimizer'.format(name)).load_state_dict(checkpoint['{}_optimizer'.format(name)])
+                    getattr(self, '{}_scheduler'.format(name)).load_state_dict(checkpoint['{}_scheduler'.format(name)])
+            except:
+                print('Failed to load network: {}'.format(name))
+    def load_single_network(self, ckpt_path, net_name, resume_training=False, strict=True):
+        checkpoint = torch.load(ckpt_path, map_location=self.device)
+        print('Load ckpt weight: {}'.format(ckpt_path))
+        if resume_training:
+            print('Load training config for breakpoint continuation')
+            self.scaler.load_state_dict(checkpoint['scaler'])
+            self.start_epoch = checkpoint['epoch']
+        getattr(self, net_name).load_state_dict(checkpoint[net_name], strict=strict)
+        if resume_training:
+            getattr(self, '{}_optimizer'.format(net_name)).load_state_dict(checkpoint['{}_optimizer'.format(net_name)])
+            getattr(self, '{}_scheduler'.format(net_name)).load_state_dict(checkpoint['{}_scheduler'.format(net_name)])
+    def save_networks(self, epoch_index, save_dir):
+        if dist.get_rank() == 0:
+            checkpoint = {}
+            for name in self.net_names:
+                checkpoint[name] = getattr(self, name).state_dict()
+                checkpoint['{}_optimizer'.format(name)] = getattr(self, '{}_optimizer'.format(name)).state_dict()
+                checkpoint['{}_scheduler'.format(name)] = getattr(self, '{}_scheduler'.format(name)).state_dict()
+            checkpoint['scaler'] = self.scaler.state_dict()
+            checkpoint['epoch'] = epoch_index
+            torch.save(checkpoint, os.path.join(save_dir, 'Epoch_{}.pkl'.format(epoch_index)))
+    def save_best_networks(self, epoch_index, save_dir):
+        if dist.get_rank() == 0:
+            checkpoint = {}
+            for name in self.net_names:
+                checkpoint[name] = getattr(self, name).state_dict()
+                checkpoint['{}_optimizer'.format(name)] = getattr(self, '{}_optimizer'.format(name)).state_dict()
+                checkpoint['{}_scheduler'.format(name)] = getattr(self, '{}_scheduler'.format(name)).state_dict()
+            checkpoint['scaler'] = self.scaler.state_dict()
+            checkpoint['epoch'] = epoch_index
+            torch.save(checkpoint, os.path.join(save_dir, 'Epoch_best.pkl'))

base/base_wandb_model.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import wandb
+import torch
+import numpy as np
+from monai.visualize import blend_images
+class WandBModel:
+    """
+    Enable WandB features to the model using multiple inheritance
+    """
+    def __init__(self, *args, **kwargs):
+        # the following attributes should be initialized by class `BaseSegmentationModel`
+        self.visual_pairs = None
+        self.train_loss = None
+        self.val_loss = None
+        self.metric_meter = None
+        self.name = None
+        # the following attributes should be initialized by the child class
+        self.val_table = None
+    def volume2videos(self, time_dim=3, tag=''):
+        """
+        Convert 3D volumes to video in favor of WandB logging
+        Args:
+            time_dim: the spatial dimension to be converted as the time dimension, default is the axial axis (dim 3)
+            tag: extra information for logging
+        """
+        videos = []
+        for image_pair in self.visual_pairs:
+            try:
+                pair_name = getattr(self, image_pair['name'])
+                image = getattr(self, image_pair['image'])
+                mask = getattr(self, image_pair['mask'])
+                vis_type = image_pair['type']
+            except:
+                continue
+            for i in range(image.shape[0]):  # deallocate the batch dim
+                image2save = image[i, ...]
+                mask2save = mask[i, ...]
+                item_name = pair_name[i]
+                # detach the tensor, format [C, H, W, D]
+                image_numpy = image2save.detach()
+                mask_numpy = mask2save.detach()
+                if mask_numpy.shape[0] > 1:
+                    mask_numpy = torch.argmax(mask_numpy, dim=0, keepdim=True)
+                # (C, H, W, D), torch.Tensor on device
+                pair_blend = blend_images(image_numpy, mask_numpy, alpha=0.5) * 255
+                # permute the axes to (time, channel, height, width)
+                spatial_dim = list(range(1, len(pair_blend.shape[1:]) + 1))
+                spatial_dim.remove(time_dim)
+                pair_blend = pair_blend.permute([time_dim, 0] + spatial_dim).cpu().numpy().astype(np.uint8)
+                # record in the wandb.Video class
+                video = wandb.Video(pair_blend, fps=8, caption='{}_{}{}'.format(item_name, vis_type, tag))
+                videos.append(video)
+        return videos
+    def log_scaler(self, key, value, step=None):
+        """
+        Log manually defined scaler data
+        """
+        wandb.log({key: np.round(value, decimals=4)}, step=step)
+    def log_train_loss(self, step=None):
+        """
+        Log train loss
+        """
+        data_dict = self.train_loss.pop_data(True)
+        for key, value in data_dict.items():
+            wandb.log({'train/{}'.format(key): value}, step=step)
+    def log_val_loss(self, step=None):
+        """
+        Log val loss
+        """
+        data_dict = self.val_loss.pop_data(True)
+        for key, value in data_dict.items():
+            wandb.log({'val/{}'.format(key): value}, step=step)
+    def log_metrics(self, step=None):
+        """
+        Log validation metrics as wandb.Table
+        """
+        df = self.metric_meter.to_df()
+        wandb.log({'val/metrics': wandb.Table(dataframe=df)}, step=step)
+    def log_vis(self, key, step=None, time_dim=3, tag=''):
+        """
+        Log training intermediate visualizations
+        """
+        videos = self.volume2videos(time_dim, tag)
+        wandb.log({key: videos}, step=step)
+    def update_val_visualization(self, time_dim=3, tag=''):
+        """
+        Update the validation visualization to buffer, called every step of evaluation
+        """
+        videos = self.volume2videos(time_dim, tag)
+        self.val_table.add_data(self.name, *videos)
+    def log_val_visualization(self, step=None):
+        """
+        Log validation visualization
+        """
+        wandb.log({'val/visualization': self.val_table}, step=step)
+        # re-initialize the table for next logging
+        del self.val_table
+        self.val_table = wandb.Table(columns=['ID'] + [pair['type'] for pair in self.visual_pairs])

checkpoint/stage_i.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aaad39f42a0f916ba44b39071dcfbf1145ee43f6f5a269e3f4364b81d361d794
+size 494807162

checkpoint/stage_ii.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:155209bbe587905366b100cf2e8fadc9e8b9c672a0920eb848fcb80a3fcd5e8c
+size 425297586

ckpt/BiomedCLIP/biomed-vlp-eval.svg ADDED Viewed

ckpt/BiomedCLIP/biomed_clip_example.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

ckpt/BiomedCLIP/config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "architectures": [
+    "BertForMaskedLM"
+  ],
+  "model_type": "bert",
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "type_vocab_size": 2,
+  "vocab_size": 30522
+}

ckpt/BiomedCLIP/open_clip_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "model_cfg": {
+    "embed_dim": 512,
+    "vision_cfg": {
+      "timm_model_name": "vit_base_patch16_224",
+      "timm_model_pretrained": false,
+      "timm_pool": "",
+      "timm_proj": "linear",
+      "image_size": 224
+    },
+    "text_cfg": {
+      "hf_model_name": "./ckpt/BiomedCLIP/",
+      "hf_tokenizer_name": "./ckpt/BiomedCLIP/",
+      "hf_proj_type": "mlp",
+      "hf_pooler_type": "cls_last_hidden_state_pooler",
+      "context_length": 77
+    }
+  },
+  "preprocess_cfg": {
+    "mean": [
+      0.48145466,
+      0.4578275,
+      0.40821073
+    ],
+    "std": [
+      0.26862954,
+      0.26130258,
+      0.27577711
+    ]
+  }
+}

ckpt/BiomedCLIP/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

ckpt/BiomedCLIP/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ckpt/BiomedCLIP/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

ckpt/BiomedCLIP/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

configs/confocal.cfg ADDED Viewed

	@@ -0,0 +1,36 @@

+MODEL:
+  IMG_SIZE: 1024
+  CROP_SIZE: 320
+  CONFOCAL_IN: 3  # 3-channel microscope file
+  CONFOCAL_OUT: 1  # nuclei
+  IMC_IN: 4  # 3-channel microscope file + 1 channel confocal
+  IMC_OUT: 1  # nuclei
+  GRAD_CKPT: True
+  TIMM_MODEL: none
+  NORM: INSTANCE
+  CONFOCAL_PATH: none
+  IMC_PATH: none
+TRAIN:
+  LR_G: 0.0002
+  LR_D: 0.0002
+  DECAY: 0.0
+  BETA1: 0.5
+  EARLY_STAGE: 0
+  BURN_IN: 0
+  BURN: 500
+  RAMPUP: 1000
+  EPOCHS: 1000
+  BATCHSIZE: 16
+  CROP_SAMPLE_NUM: 16
+  RATIO: 0.2
+  SEED: 42
+  PERTURB_PROB: 0.1
+  IMC_RATIO: 100.0
+  CON_RATIO: 100.0
+  SIM_RATIO: 50.0
+  EDGE_RATIO: 100.0
+  ADV_RATIO: 1.0
+  CLR_RATIO: 0.0
+  FREQ_RATIO: 0.00001
+TEST:
+  BATCHSIZE: 32

configs/confocal_marker.cfg ADDED Viewed

	@@ -0,0 +1,26 @@

+MODEL:
+  IMG_SIZE: 1024
+  CROP_SIZE: 320
+  IMC_IN: 1
+  IMC_OUT: 1
+  GRAD_CKPT: True
+  PRETRAIN: none
+TRAIN:
+  LR_G: 0.002
+  LR_D: 0.002
+  DECAY: 0.0
+  BETA1: 0.5
+  EARLY_STAGE: 0
+  BURN_IN: 0
+  BURN: 100
+  RAMPUP: 100
+  EPOCHS: 100
+  BATCHSIZE: 8
+  CROP_SAMPLE_NUM: 8
+  RATIO: 0.2
+  SEED: 42
+  IMC_RATIO: 100.0
+  EDGE_RATIO: 10.0
+  ADV_RATIO: 1.0
+TEST:
+  BATCHSIZE: 16

configs/convertion.cfg ADDED Viewed

	@@ -0,0 +1,25 @@

+MODEL:
+  IMG_SIZE: 1024
+  CROP_SIZE: 320
+  IMC_IN: 1
+  IMC_OUT: 1
+  GRAD_CKPT: True
+TRAIN:
+  LR_G: 0.002
+  LR_D: 0.002
+  DECAY: 0.0
+  BETA1: 0.5
+  EARLY_STAGE: 0
+  BURN_IN: 0
+  BURN: 100
+  RAMPUP: 100
+  EPOCHS: 100
+  BATCHSIZE: 16
+  CROP_SAMPLE_NUM: 8
+  RATIO: 0.2
+  SEED: 42
+  IMC_RATIO: 100.0
+  EDGE_RATIO: 10.0
+  ADV_RATIO: 1.0
+TEST:
+  BATCHSIZE: 64

configs/extend_1.cfg ADDED Viewed

	@@ -0,0 +1,36 @@

+MODEL:
+  IMG_SIZE: 1024
+  CROP_SIZE: 320
+  CONFOCAL_IN: 3  # 3-channel microscope file
+  CONFOCAL_OUT: 1  # nuclei
+  IMC_IN: 4  # 3-channel microscope file + 1 channel confocal
+  IMC_OUT: 1  # nuclei
+  GRAD_CKPT: True
+  TIMM_MODEL: none
+  NORM: INSTANCE
+  CONFOCAL_PATH: none
+  IMC_PATH: none
+TRAIN:
+  LR_G: 0.0002
+  LR_D: 0.0002
+  DECAY: 0.0
+  BETA1: 0.5
+  EARLY_STAGE: 0
+  BURN_IN: 0
+  BURN: 500
+  RAMPUP: 1000
+  EPOCHS: 1000
+  BATCHSIZE: 16
+  CROP_SAMPLE_NUM: 16
+  RATIO: 0.2
+  SEED: 42
+  PERTURB_PROB: 0.1
+  IMC_RATIO: 100.0
+  CON_RATIO: 100.0
+  SIM_RATIO: 50.0
+  EDGE_RATIO: 100.0
+  ADV_RATIO: 1.0
+  CLR_RATIO: 0.0
+  FREQ_RATIO: 0.00001
+TEST:
+  BATCHSIZE: 32

configs/extend_2.cfg ADDED Viewed

	@@ -0,0 +1,26 @@

+MODEL:
+  IMG_SIZE: 1024
+  CROP_SIZE: 320
+  IMC_IN: 1
+  IMC_OUT: 1
+  GRAD_CKPT: True
+  PRETRAIN: /mnt/shared_storage/zhaoxiangyu/experiments/IMC_translation_v2/checkpoints/convertion/convertion_0918-task_convertion-ratio_0.2/Epoch_39.pkl
+TRAIN:
+  LR_G: 0.002
+  LR_D: 0.002
+  DECAY: 0.0
+  BETA1: 0.5
+  EARLY_STAGE: 0
+  BURN_IN: 0
+  BURN: 100
+  RAMPUP: 100
+  EPOCHS: 100
+  BATCHSIZE: 8
+  CROP_SAMPLE_NUM: 8
+  RATIO: 0.2
+  SEED: 42
+  IMC_RATIO: 100.0
+  EDGE_RATIO: 10.0
+  ADV_RATIO: 1.0
+TEST:
+  BATCHSIZE: 16

configs/full.cfg ADDED Viewed

	@@ -0,0 +1,39 @@

+MODEL:
+  IMG_SIZE: 1024
+  CROP_SIZE: 320
+  CONFOCAL_IN: 3  # 3-channel microscope file
+  CONFOCAL_OUT: 1  # nuclei
+  IMC_IN: 4  # 3-channel microscope file + 1 channel confocal
+  IMC_OUT: 1  # nuclei
+  CONVERTION_IN: 1
+  CONVERTION_OUT: 1
+  GRAD_CKPT: True
+  TIMM_MODEL: none
+  NORM: INSTANCE
+  CONFOCAL_PATH: none
+  IMC_PATH: none
+  CONVERTION_PATH: /mnt/shared_storage/zhaoxiangyu/experiments/IMC_translation_v2/checkpoints/convertion/convertion_0918-task_convertion-ratio_0.2/Epoch_39.pkl
+TRAIN:
+  LR_G: 0.0002
+  LR_D: 0.0002
+  DECAY: 0.0
+  BETA1: 0.5
+  EARLY_STAGE: 0
+  BURN_IN: 0
+  BURN: 500
+  RAMPUP: 1000
+  EPOCHS: 1000
+  BATCHSIZE: 16
+  CROP_SAMPLE_NUM: 16
+  RATIO: 0.2
+  SEED: 42
+  PERTURB_PROB: 0.1
+  IMC_RATIO: 100.0
+  CON_RATIO: 100.0
+  SIM_RATIO: 50.0
+  EDGE_RATIO: 100.0
+  ADV_RATIO: 1.0
+  CLR_RATIO: 0.0
+  FREQ_RATIO: 0.00001
+TEST:
+  BATCHSIZE: 32

configs/imc.cfg ADDED Viewed

	@@ -0,0 +1,36 @@

+MODEL:
+  IMG_SIZE: 1024
+  CROP_SIZE: 320
+  CONFOCAL_IN: 3  # 3-channel microscope file
+  CONFOCAL_OUT: 1  # nuclei
+  IMC_IN: 4  # 3-channel microscope file + 1 channel confocal
+  IMC_OUT: 1  # nuclei
+  GRAD_CKPT: True
+  TIMM_MODEL: none
+  NORM: INSTANCE
+  CONFOCAL_PATH: none
+  IMC_PATH: none
+TRAIN:
+  LR_G: 0.0002
+  LR_D: 0.0002
+  DECAY: 0.0
+  BETA1: 0.5
+  EARLY_STAGE: 0
+  BURN_IN: 0
+  BURN: 500
+  RAMPUP: 1000
+  EPOCHS: 1000
+  BATCHSIZE: 16
+  CROP_SAMPLE_NUM: 16
+  RATIO: 0.2
+  SEED: 42
+  PERTURB_PROB: 0.1
+  IMC_RATIO: 100.0
+  CON_RATIO: 100.0
+  SIM_RATIO: 50.0
+  EDGE_RATIO: 100.0
+  ADV_RATIO: 1.0
+  CLR_RATIO: 0.0
+  FREQ_RATIO: 0.00001
+TEST:
+  BATCHSIZE: 32

configs/translation.cfg ADDED Viewed

	@@ -0,0 +1,36 @@

+MODEL:
+  IMG_SIZE: 1024
+  CROP_SIZE: 320
+  CONFOCAL_IN: 3  # 3-channel microscope file
+  CONFOCAL_OUT: 1  # nuclei
+  IMC_IN: 4  # 3-channel microscope file + 1 channel confocal
+  IMC_OUT: 1  # nuclei
+  GRAD_CKPT: True
+  TIMM_MODEL: none
+  NORM: INSTANCE
+  CONFOCAL_PATH: none
+  IMC_PATH: none
+TRAIN:
+  LR_G: 0.0002
+  LR_D: 0.0002
+  DECAY: 0.0
+  BETA1: 0.5
+  EARLY_STAGE: 0
+  BURN_IN: 0
+  BURN: 500
+  RAMPUP: 1000
+  EPOCHS: 1000
+  BATCHSIZE: 16
+  CROP_SAMPLE_NUM: 16
+  RATIO: 0.2
+  SEED: 42
+  PERTURB_PROB: 0.1
+  IMC_RATIO: 100.0
+  CON_RATIO: 100.0
+  SIM_RATIO: 50.0
+  EDGE_RATIO: 100.0
+  ADV_RATIO: 1.0
+  CLR_RATIO: 0.0
+  FREQ_RATIO: 0.00001
+TEST:
+  BATCHSIZE: 32

markers.py ADDED Viewed

	@@ -0,0 +1,136 @@

+breast_markers = ['HER2',
+ 'TAPAN8',
+ 'CD15',
+ 'CD206',
+ 'CD11b',
+ 'HLA_DR',
+ 'H3',
+ 'CD8a',
+ 'ISG15',
+ 'CD14',
+ 'ZC3HV1',
+ 'Collagen1',
+ 'CD4',
+ 'CD66b',
+ 'ALDH1',
+ 'FOXP3',
+ 'SMA',
+ 'CD24',
+ 'CD44',
+ 'CD54',
+ 'PPARG',
+ 'CD31',
+ 'PD1',
+ 'CD19',
+ 'CD69',
+ 'PKCD',
+ 'Ki67',
+ 'ER',
+ 'CD11c',
+ 'CD27',
+ 'LPS',
+ 'CD11a',
+ 'PR',
+ 'CD3',
+ 'CD68',
+ 'CD83',
+ 'LTA',
+ 'IFI6',
+ 'CD45',
+ 'CDH1',
+ 'CD62L']
+pancreatic_markers = ['PGAM1',
+ 'CD44',
+ 'Amy2A',
+ 'PGK1',
+ 'PGAM5',
+ 'CD99',
+ 'CoL1',
+ 'TALDO',
+ 'ALDOB',
+ 'ALDO',
+ 'HK2',
+ 'HK3',
+ 'TPI',
+ 'PKM',
+ 'LDH',
+ 'CK7',
+ 'PDPN',
+ 'HK1',
+ 'NSE',
+ 'AMF',
+ 'PFKM',
+ 'CD45',
+ 'PGAM4',
+ 'GAPDH',
+ 'CD31',
+ 'ECAD',
+ 'PGAM2',
+ 'aSMA',
+ 'LDHB']
+prostatic_markers = ['CXCR4',
+ 'EGFR',
+ 'LAG-3',
+ 'CD278',
+ 'PSMA',
+ 'CD15',
+ 'CD134',
+ 'CTLA4',
+ 'Nestin',
+ 'CD16',
+ 'CD56',
+ 'PD-1',
+ 'CD11b',
+ 'CD66a',
+ 'CXCL12',
+ 'CCR7',
+ 'IDO',
+ 'CD73',
+ 'CD33',
+ 'VEGF',
+ 'CD8a',
+ 'aSMA',
+ 'CD14',
+ 'AMACR',
+ 'CD20',
+ 'Ki-67',
+ 'CD4',
+ 'SOX-9',
+ 'B7-H4',
+ 'CD11C',
+ 'IFNgamma',
+ 'CD25',
+ 'Pan-Keratin',
+ 'Pan-Actin',
+ 'CD45AR',
+ 'CD74',
+ 'CD276',
+ 'HLA-DR',
+ 'CD31',
+ 'CD45RO',
+ 'TGFbeta',
+ 'CD366',
+ 'CD19',
+ 'PSA',
+ 'Foxp3',
+ 'EpCAM',
+ 'GranzymeB',
+ 'BCL-2',
+ 'ARG1',
+ 'CD27',
+ 'hFAP',
+ 'PDL-2',
+ 'Keratin8',
+ 'PDL-1',
+ 'CD127',
+ 'CD304',
+ 'CD3',
+ 'CD68',
+ 'AR',
+ 'CD45',
+ 'Vista',
+ 'CD62L',
+ 'CD163',
+ 'pan-actin']

models/modules/biomedclip.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import json
+import torch.nn as nn
+from open_clip.factory import *
+# def create_model_and_transforms(
+#         model_name: str,
+#         config: str,
+#         device: Union[str, torch.device] = 'cpu',
+#         cache_dir: Optional[str] = None,
+#         force_preprocess_cfg: Optional[Dict[str, Any]] = None,
+# ):
+#     force_preprocess_cfg = force_preprocess_cfg or {}
+#     preprocess_cfg = asdict(PreprocessCfg())
+#     with open(config, 'r') as f:
+#         config = json.load(f)
+#     checkpoint_path = os.path.join(cache_dir, 'open_clip_pytorch_model.bin')
+#     preprocess_cfg = merge_preprocess_dict(preprocess_cfg, config['preprocess_cfg'])
+#     model_cfg = config['model_cfg']
+#     if isinstance(device, str):
+#         device = torch.device(device)
+#     print(f'Loaded {model_name} model config.')
+#     # load pretrained weights for HF text model IFF no CLIP weights being loaded
+#     model_cfg['text_cfg']['hf_model_pretrained'] = False
+#     model = CustomTextCLIP(**model_cfg)
+#     model.to(device=device)
+#     print(f'Loading pretrained {model_name} weights ({checkpoint_path}).')
+#     load_checkpoint(model, checkpoint_path)
+#     # set image preprocessing configuration in model attributes for convenience
+#     if getattr(model.visual, 'image_size', None) is not None:
+#         # use image_size set on model creation (via config or force_image_size arg)
+#         force_preprocess_cfg['size'] = model.visual.image_size
+#     set_model_preprocess_cfg(model, merge_preprocess_dict(preprocess_cfg, force_preprocess_cfg))
+#     pp_cfg = PreprocessCfg(**model.visual.preprocess_cfg)
+#     preprocess_train = image_transform_v2(
+#         pp_cfg,
+#         is_train=True,
+#         aug_cfg=None,
+#     )
+#     preprocess_val = image_transform_v2(
+#         pp_cfg,
+#         is_train=False,
+#     )
+#     return model, preprocess_train, preprocess_val
+def get_my_tokenizer(
+        config: str,
+        context_length: Optional[int] = None,
+        **kwargs,
+):
+    with open(config, 'r') as f:
+        config = json.load(f)
+    text_config = config['model_cfg']['text_cfg']
+    if 'tokenizer_kwargs' in text_config:
+        tokenizer_kwargs = dict(text_config['tokenizer_kwargs'], **kwargs)
+    else:
+        tokenizer_kwargs = kwargs
+    if context_length is None:
+        context_length = text_config.get('context_length', DEFAULT_CONTEXT_LENGTH)
+    if 'hf_tokenizer_name' in text_config:
+        tokenizer = HFTokenizer(
+            text_config['hf_tokenizer_name'],
+            context_length=context_length,
+            **tokenizer_kwargs,
+        )
+    else:
+        tokenizer = SimpleTokenizer(
+            context_length=context_length,
+            **tokenizer_kwargs,
+        )
+    return tokenizer
+class BiomedCLIPTextEncoder(nn.Module):
+    def __init__(self, device: torch.device) -> None:
+        super().__init__()
+        # self.model, _, _ = create_model_and_transforms(
+        #     model_name='hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224',
+        #     # config='./ckpt/BiomedCLIP/open_clip_config.json',
+        #     cache_dir='./ckpt/BiomedCLIP/'
+        #     )
+        self.model, _, _ = create_model_and_transforms('hf-hub:hsiangyualex/biomedclip4imc')
+        self.model.eval()
+        self.model.to(device)
+        for param in self.model.parameters():
+            param.requires_grad = False
+        # self.tokenizer = get_my_tokenizer(config='./ckpt/BiomedCLIP/open_clip_config.json')
+        self.tokenizer = get_tokenizer('hf-hub:hsiangyualex/biomedclip4imc')
+        self.device = device
+    @torch.no_grad()
+    def forward(self, prompts):
+        """
+        Args:
+            prompts: a series of protein names
+        """
+        prompts = [f"An imaging mass cytometry staining image of {prompt} protein." for prompt in prompts]
+        prompts = self.tokenizer(prompts).to(self.device)
+        text_features = self.model.encode_text(prompts).detach()
+        return text_features

models/modules/dct.py ADDED Viewed

	@@ -0,0 +1,305 @@

+import numpy as np
+import torch
+import torch.nn as nn
+try:
+    # PyTorch 1.7.0 and newer versions
+    import torch.fft
+    def dct1_rfft_impl(x):
+        return torch.view_as_real(torch.fft.rfft(x, dim=1))
+    def dct_fft_impl(v):
+        return torch.view_as_real(torch.fft.fft(v, dim=1))
+    def idct_irfft_impl(V):
+        return torch.fft.irfft(torch.view_as_complex(V), n=V.shape[1], dim=1)
+except ImportError:
+    # PyTorch 1.6.0 and older versions
+    def dct1_rfft_impl(x):
+        return torch.rfft(x, 1)
+    def dct_fft_impl(v):
+        return torch.rfft(v, 1, onesided=False)
+    def idct_irfft_impl(V):
+        return torch.irfft(V, 1, onesided=False)
+def dct1(x):
+    """
+    Discrete Cosine Transform, Type I
+    :param x: the input signal
+    :return: the DCT-I of the signal over the last dimension
+    """
+    x_shape = x.shape
+    x = x.view(-1, x_shape[-1])
+    x = torch.cat([x, x.flip([1])[:, 1:-1]], dim=1)
+    return dct1_rfft_impl(x)[:, :, 0].view(*x_shape)
+def idct1(X):
+    """
+    The inverse of DCT-I, which is just a scaled DCT-I
+    Our definition if idct1 is such that idct1(dct1(x)) == x
+    :param X: the input signal
+    :return: the inverse DCT-I of the signal over the last dimension
+    """
+    n = X.shape[-1]
+    return dct1(X) / (2 * (n - 1))
+def dct(x, norm=None):
+    """
+    Discrete Cosine Transform, Type II (a.k.a. the DCT)
+    For the meaning of the parameter `norm`, see:
+    https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
+    :param x: the input signal
+    :param norm: the normalization, None or 'ortho'
+    :return: the DCT-II of the signal over the last dimension
+    """
+    x_shape = x.shape
+    N = x_shape[-1]
+    x = x.contiguous().view(-1, N)
+    v = torch.cat([x[:, ::2], x[:, 1::2].flip([1])], dim=1)
+    Vc = dct_fft_impl(v)
+    k = - torch.arange(N, dtype=x.dtype, device=x.device)[None, :] * np.pi / (2 * N)
+    W_r = torch.cos(k)
+    W_i = torch.sin(k)
+    V = Vc[:, :, 0] * W_r - Vc[:, :, 1] * W_i
+    if norm == 'ortho':
+        V[:, 0] /= np.sqrt(N) * 2
+        V[:, 1:] /= np.sqrt(N / 2) * 2
+    V = 2 * V.view(*x_shape)
+    return V
+def idct(X, norm=None):
+    """
+    The inverse to DCT-II, which is a scaled Discrete Cosine Transform, Type III
+    Our definition of idct is that idct(dct(x)) == x
+    For the meaning of the parameter `norm`, see:
+    https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
+    :param X: the input signal
+    :param norm: the normalization, None or 'ortho'
+    :return: the inverse DCT-II of the signal over the last dimension
+    """
+    x_shape = X.shape
+    N = x_shape[-1]
+    X_v = X.contiguous().view(-1, x_shape[-1]) / 2
+    if norm == 'ortho':
+        X_v[:, 0] *= np.sqrt(N) * 2
+        X_v[:, 1:] *= np.sqrt(N / 2) * 2
+    k = torch.arange(x_shape[-1], dtype=X.dtype, device=X.device)[None, :] * np.pi / (2 * N)
+    W_r = torch.cos(k)
+    W_i = torch.sin(k)
+    V_t_r = X_v
+    V_t_i = torch.cat([X_v[:, :1] * 0, -X_v.flip([1])[:, :-1]], dim=1)
+    V_r = V_t_r * W_r - V_t_i * W_i
+    V_i = V_t_r * W_i + V_t_i * W_r
+    V = torch.cat([V_r.unsqueeze(2), V_i.unsqueeze(2)], dim=2)
+    v = idct_irfft_impl(V)
+    x = v.new_zeros(v.shape)
+    x[:, ::2] += v[:, :N - (N // 2)]
+    x[:, 1::2] += v.flip([1])[:, :N // 2]
+    return x.view(*x_shape)
+def dct_2d(x, norm=None):
+    """
+    2-dimentional Discrete Cosine Transform, Type II (a.k.a. the DCT)
+    For the meaning of the parameter `norm`, see:
+    https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
+    :param x: the input signal
+    :param norm: the normalization, None or 'ortho'
+    :return: the DCT-II of the signal over the last 2 dimensions
+    """
+    X1 = dct(x, norm=norm)
+    X2 = dct(X1.transpose(-1, -2), norm=norm)
+    return X2.transpose(-1, -2)
+def idct_2d(X, norm=None):
+    """
+    The inverse to 2D DCT-II, which is a scaled Discrete Cosine Transform, Type III
+    Our definition of idct is that idct_2d(dct_2d(x)) == x
+    For the meaning of the parameter `norm`, see:
+    https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
+    :param X: the input signal
+    :param norm: the normalization, None or 'ortho'
+    :return: the DCT-II of the signal over the last 2 dimensions
+    """
+    x1 = idct(X, norm=norm)
+    x2 = idct(x1.transpose(-1, -2), norm=norm)
+    return x2.transpose(-1, -2)
+def dct_3d(x, norm=None):
+    """
+    3-dimentional Discrete Cosine Transform, Type II (a.k.a. the DCT)
+    For the meaning of the parameter `norm`, see:
+    https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
+    :param x: the input signal
+    :param norm: the normalization, None or 'ortho'
+    :return: the DCT-II of the signal over the last 3 dimensions
+    """
+    X1 = dct(x, norm=norm)
+    X2 = dct(X1.transpose(-1, -2), norm=norm)
+    X3 = dct(X2.transpose(-1, -3), norm=norm)
+    return X3.transpose(-1, -3).transpose(-1, -2)
+def idct_3d(X, norm=None):
+    """
+    The inverse to 3D DCT-II, which is a scaled Discrete Cosine Transform, Type III
+    Our definition of idct is that idct_3d(dct_3d(x)) == x
+    For the meaning of the parameter `norm`, see:
+    https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
+    :param X: the input signal
+    :param norm: the normalization, None or 'ortho'
+    :return: the DCT-II of the signal over the last 3 dimensions
+    """
+    x1 = idct(X, norm=norm)
+    x2 = idct(x1.transpose(-1, -2), norm=norm)
+    x3 = idct(x2.transpose(-1, -3), norm=norm)
+    return x3.transpose(-1, -3).transpose(-1, -2)
+class LinearDCT(nn.Linear):
+    """Implement any DCT as a linear layer; in practice this executes around
+    50x faster on GPU. Unfortunately, the DCT matrix is stored, which will
+    increase memory usage.
+    :param in_features: size of expected input
+    :param type: which dct function in this file to use"""
+    def __init__(self, in_features, type, norm=None, bias=False):
+        self.type = type
+        self.N = in_features
+        self.norm = norm
+        super(LinearDCT, self).__init__(in_features, in_features, bias=bias)
+    def reset_parameters(self):
+        # initialise using dct function
+        I = torch.eye(self.N)
+        if self.type == 'dct1':
+            self.weight.data = dct1(I).data.t()
+        elif self.type == 'idct1':
+            self.weight.data = idct1(I).data.t()
+        elif self.type == 'dct':
+            self.weight.data = dct(I, norm=self.norm).data.t()
+        elif self.type == 'idct':
+            self.weight.data = idct(I, norm=self.norm).data.t()
+        self.weight.requires_grad = False # don't learn this!
+def apply_linear_2d(x, linear_layer):
+    """Can be used with a LinearDCT layer to do a 2D DCT.
+    :param x: the input signal
+    :param linear_layer: any PyTorch Linear layer
+    :return: result of linear layer applied to last 2 dimensions
+    """
+    X1 = linear_layer(x)
+    X2 = linear_layer(X1.transpose(-1, -2))
+    return X2.transpose(-1, -2)
+def apply_linear_3d(x, linear_layer):
+    """Can be used with a LinearDCT layer to do a 3D DCT.
+    :param x: the input signal
+    :param linear_layer: any PyTorch Linear layer
+    :return: result of linear layer applied to last 3 dimensions
+    """
+    X1 = linear_layer(x)
+    X2 = linear_layer(X1.transpose(-1, -2))
+    X3 = linear_layer(X2.transpose(-1, -3))
+    return X3.transpose(-1, -3).transpose(-1, -2)
+class DCTHelper(nn.Module):
+    """
+    Implement DCT operations and corresponding masking.
+    """
+    def __init__(self, side_length: int, norm: str = None, cutoff: float = 0.8, data_range: tuple = (-1.0, 1.0)):
+        """
+        Args:
+            side_length: the side length of the image
+            norm: the normalization, None or 'ortho'
+            cutoff: the cutoff frequency ratio for low-pass filtering
+        """
+        super().__init__()
+        self.dct = LinearDCT(side_length, 'dct')
+        self.idct = LinearDCT(side_length, 'idct')
+        mask = self.create_circular_mask(side_length, side_length, radius=side_length * cutoff, center=(0, 0))
+        self.register_buffer('mask', torch.from_numpy(mask).float()[None, None, ...])
+        self.data_range = data_range
+    @staticmethod
+    def create_circular_mask(h, w, center=None, radius=None):
+        if center is None: # use the middle of the image
+            center = (int(w/2), int(h/2))
+        if radius is None: # use the smallest distance between the center and image walls
+            radius = min(center[0], center[1], w-center[0], h-center[1])
+        Y, X = np.ogrid[:h, :w]
+        dist_from_center = np.sqrt((X - center[0])**2 + (Y-center[1])**2)
+        mask = dist_from_center <= radius
+        return mask
+    def run_dct(self, x):
+        return apply_linear_2d(x, self.dct)
+    def run_idct(self, x):
+        return apply_linear_2d(x, self.idct)
+    def forward(self, x, mode: str = 'dct'):
+        if mode == 'dct':
+            return self.run_dct(x)
+        elif mode == 'idct':
+            return self.run_idct(x)
+        else:
+            raise ValueError(f"Invalid mode: {mode}")
+if __name__ == '__main__':
+    x = torch.Tensor(1000,4096)
+    x.normal_(0,1)
+    linear_dct = LinearDCT(4096, 'dct')
+    error = torch.abs(dct(x) - linear_dct(x))
+    assert error.max() < 1e-3, (error, error.max())
+    linear_idct = LinearDCT(4096, 'idct')
+    error = torch.abs(idct(x) - linear_idct(x))
+    assert error.max() < 1e-3, (error, error.max())

models/modules/networks.py ADDED Viewed

	@@ -0,0 +1,714 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from base.base_modules import *
+from timm.models import create_model
+from functools import partial
+class Backbone(nn.Module):
+    """
+    Model backbone to extract features
+    """
+    def __init__(self,
+                 input_channels: int = 3,
+                 channels: tuple = (32, 64, 128, 256, 512),
+                 strides: tuple = (2, 2, 2, 2),
+                 use_dropout: bool = False,
+                 norm: str = 'BATCH',
+                 leaky: bool = True):
+        """
+        Args:
+            input_channels: the number of input channels
+            channels: length-5 tuple, define the number of channels in each stage
+            strides: tuple, define the stride in each stage
+            use_dropout: bool, whether to use dropout
+            norm: str, normalization type
+            leaky: bool, whether to use leaky relu
+        """
+        super().__init__()
+        self.nb_filter = channels
+        self.strides = strides + (5 - len(strides)) * (1,)
+        res_unit = ResBlock if channels[-1] <= 320 else ResBottleneck
+        self.conv0_0 = nn.Sequential(
+            nn.Conv2d(input_channels, channels[0], kernel_size=7, stride=self.strides[0], padding=3),
+            nn.GroupNorm(1, channels[0]) if norm == 'GROUP' else nn.BatchNorm2d(channels[0]) if norm == 'BATCH' else nn.InstanceNorm2d(channels[0]),
+            nn.LeakyReLU() if leaky else nn.ReLU(),
+        )
+        self.conv1_0 = res_unit(self.nb_filter[0], self.nb_filter[1], self.strides[1], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        self.conv2_0 = res_unit(self.nb_filter[1], self.nb_filter[2], self.strides[2], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        self.conv3_0 = res_unit(self.nb_filter[2], self.nb_filter[3], self.strides[3], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        self.conv4_0 = res_unit(self.nb_filter[3], self.nb_filter[4], self.strides[4], use_dropout=use_dropout, norm=norm, leaky=leaky)
+    def forward(self, x):
+        x0_0 = self.conv0_0(x)
+        x1_0 = self.conv1_0(x0_0)
+        x2_0 = self.conv2_0(x1_0)
+        x3_0 = self.conv3_0(x2_0)
+        x4_0 = self.conv4_0(x3_0)
+        return x0_0, x1_0, x2_0, x3_0, x4_0
+class TimmBackbone(nn.Module):
+    """
+    Timm backbone to extract features, utilizing pretrained weights
+    """
+    def __init__(self, model_name) -> None:
+        super().__init__()
+        self.backbone = create_model(model_name, pretrained=True, features_only=True)
+        self.determine_nb_filters()
+    def determine_nb_filters(self):
+        dummy = torch.randn(1, 3, 256, 256)
+        out = self.backbone(dummy)
+        nb_filters = []
+        for o in out:
+            nb_filters.append(o.size(1))
+        self.nb_filter = nb_filters
+    def forward(self, inputs):
+        return self.backbone(inputs)
+class UNet(nn.Module):
+    def __init__(self,
+                 model_name: str = None,
+                 in_channels: int = 1,
+                 out_channels: int = None,
+                 channels: tuple = (64, 128, 256, 320, 512),
+                 strides: tuple = (2, 2, 2, 2, 2),
+                 use_dropout: bool = False,
+                 norm: str = 'INSTANCE',
+                 leaky: bool = True,
+                 use_dilated_bottleneck: bool = False):
+        """
+        Args:
+            model_name: timm model name
+            input_channels: the number of input channels
+            in_channels: the number of output channels
+            channels: length-5 tuple, define the number of channels in each stage
+            strides: tuple, define the stride in each stage
+            use_dropout: bool, whether to use dropout
+            norm: str, normalization type
+            leaky: bool, whether to use leaky relu
+        """
+        super().__init__()
+        if model_name not in [None, 'none', 'None']:
+            # use Timm backbone and overrides any other input arguments
+            self.backbone = TimmBackbone(model_name)
+        else:
+            self.backbone = Backbone(input_channels=in_channels, channels=channels, strides=strides,
+                                     use_dropout=use_dropout, norm=norm, leaky=leaky)
+        nb_filter = self.backbone.nb_filter
+        res_unit = ResBlock if nb_filter[-1] <= 512 else ResBottleneck
+        # decoder
+        self.conv3_1 = res_unit(nb_filter[3] + nb_filter[4], nb_filter[3], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        self.conv2_2 = res_unit(nb_filter[2] + nb_filter[3], nb_filter[2], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        self.conv1_3 = res_unit(nb_filter[1] + nb_filter[2], nb_filter[1], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        self.conv0_4 = res_unit(nb_filter[0] + nb_filter[1], nb_filter[0], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        # dilated bottleneck: optional
+        if use_dilated_bottleneck:
+            self.dilation = nn.Sequential(
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=1, dilation=1),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=2, dilation=2),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=5, dilation=5),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=1, dilation=1),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=2, dilation=2),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=5, dilation=5),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+            )
+        else:
+            self.dilation = nn.Identity()
+        if out_channels is not None:
+            self.convds0 = nn.Conv2d(nb_filter[0], out_channels, kernel_size=1, bias=False)
+        else:
+            self.convds0 = None
+    def upsample(self, inputs, target):
+        return F.interpolate(inputs, size=target.shape[2:], mode='bilinear', align_corners=False)
+    def extract_features(self, x):
+        x0, x1, x2, x3, x4 = self.backbone(x)
+        x4 = self.dilation(x4)
+        x3_1 = self.conv3_1(torch.cat([x3, self.upsample(x4, x3)], dim=1))
+        x2_2 = self.conv2_2(torch.cat([x2, self.upsample(x3_1, x2)], dim=1))
+        x1_3 = self.conv1_3(torch.cat([x1, self.upsample(x2_2, x1)], dim=1))
+        x0_4 = self.conv0_4(torch.cat([x0, self.upsample(x1_3, x0)], dim=1))
+        return x4, x0_4
+    def forward(self, x):
+        size = x.shape[2:]
+        x0, x1, x2, x3, x4 = self.backbone(x)
+        x4 = self.dilation(x4)
+        x3_1 = self.conv3_1(torch.cat([x3, self.upsample(x4, x3)], dim=1))
+        x2_2 = self.conv2_2(torch.cat([x2, self.upsample(x3_1, x2)], dim=1))
+        x1_3 = self.conv1_3(torch.cat([x1, self.upsample(x2_2, x1)], dim=1))
+        x0_4 = self.conv0_4(torch.cat([x0, self.upsample(x1_3, x0)], dim=1))
+        if self.convds0 is not None:
+            x_out = self.convds0(x0_4)
+            out = F.interpolate(x_out, size=size, mode='bilinear', align_corners=False)
+        else:
+            out = x0_4
+        return out
+    def freeze(self):
+        # freeze the network
+        for p in self.parameters():
+            p.requires_grad = False
+    def unfreeze(self):
+        # unfreeze the network to allow parameter update
+        for p in self.parameters():
+            p.requires_grad = True
+class PromptAttentionUNet(nn.Module):
+    def __init__(self,
+                 model_name: str = None,
+                 in_channels: int = 1,
+                 out_channels: int = None,
+                 channels: tuple = (64, 128, 256, 320, 512),
+                 strides: tuple = (2, 2, 2, 2, 2),
+                 use_dropout: bool = False,
+                 norm: str = 'INSTANCE',
+                 leaky: bool = True,
+                 use_dilated_bottleneck: bool = False):
+        """
+        Args:
+            model_name: timm model name
+            input_channels: the number of input channels
+            in_channels: the number of output channels
+            channels: length-5 tuple, define the number of channels in each stage
+            strides: tuple, define the stride in each stage
+            use_dropout: bool, whether to use dropout
+            norm: str, normalization type
+            leaky: bool, whether to use leaky relu
+        """
+        super().__init__()
+        if model_name not in [None, 'none', 'None']:
+            # use Timm backbone and overrides any other input arguments
+            self.backbone = TimmBackbone(model_name)
+        else:
+            self.backbone = Backbone(input_channels=in_channels, channels=channels, strides=strides,
+                                     use_dropout=use_dropout, norm=norm, leaky=leaky)
+        nb_filter = self.backbone.nb_filter
+        res_unit = PromptResBlock if nb_filter[-1] <= 512 else PromptResBottleneck
+        # decoder
+        self.conv3_1 = res_unit(nb_filter[3] + nb_filter[4], nb_filter[3], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        self.conv2_2 = res_unit(nb_filter[2] + nb_filter[3], nb_filter[2], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        self.conv1_3 = res_unit(nb_filter[1] + nb_filter[2], nb_filter[1], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        self.conv0_4 = res_unit(nb_filter[0] + nb_filter[1], nb_filter[0], use_dropout=use_dropout, norm=norm, leaky=leaky)
+        # dilated bottleneck: optional
+        if use_dilated_bottleneck:
+            self.dilation = nn.Sequential(
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=1, dilation=1),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=1, dilation=2),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=1, dilation=5),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=1, dilation=1),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=1, dilation=2),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+                nn.Conv2d(nb_filter[4], nb_filter[4], kernel_size=3, stride=1, padding=1, dilation=5),
+                nn.GroupNorm(16, nb_filter[4]) if norm == 'GROUP' else nn.BatchNorm2d(nb_filter[4]) if norm == 'BATCH' else nn.InstanceNorm2d(nb_filter[4]),
+                nn.LeakyReLU() if leaky else nn.ReLU(),
+            )
+        else:
+            self.dilation = nn.Identity()
+        if out_channels is not None:
+            self.convds0 = nn.Conv2d(nb_filter[0], out_channels, kernel_size=1, bias=False)
+    def upsample(self, inputs, target):
+        return F.interpolate(inputs, size=target.shape[2:], mode='bilinear', align_corners=False)
+    def extract_features(self, x):
+        x0, x1, x2, x3, x4 = self.backbone(x)
+        x4 = self.dilation(x4)
+        x3_1 = self.conv3_1(torch.cat([x3, self.upsample(x4, x3)], dim=1))
+        x2_2 = self.conv2_2(torch.cat([x2, self.upsample(x3_1, x2)], dim=1))
+        x1_3 = self.conv1_3(torch.cat([x1, self.upsample(x2_2, x1)], dim=1))
+        x0_4 = self.conv0_4(torch.cat([x0, self.upsample(x1_3, x0)], dim=1))
+        return x4, x0_4
+    def forward(self, x, prompt_in):
+        size = x.shape[2:]
+        x0, x1, x2, x3, x4 = self.backbone(x)
+        x4 = self.dilation(x4)
+        x3_1 = self.conv3_1(torch.cat([x3, self.upsample(x4, x3)], dim=1), prompt_in)
+        x2_2 = self.conv2_2(torch.cat([x2, self.upsample(x3_1, x2)], dim=1), prompt_in)
+        x1_3 = self.conv1_3(torch.cat([x1, self.upsample(x2_2, x1)], dim=1), prompt_in)
+        x0_4 = self.conv0_4(torch.cat([x0, self.upsample(x1_3, x0)], dim=1), prompt_in)
+        x_out = self.convds0(x0_4)
+        out = F.interpolate(x_out, size=size, mode='bilinear', align_corners=False)
+        return out
+    def freeze(self):
+        # freeze the network
+        for p in self.parameters():
+            p.requires_grad = False
+    def unfreeze(self):
+        # unfreeze the network to allow parameter update
+        for p in self.parameters():
+            p.requires_grad = True
+class CLIPDrivenUNet(nn.Module):
+    def __init__(self, encoding: str, model_name: str = None, in_channels: int = 1, out_channels: int = 1, channels: tuple = (32, 64, 128, 256, 512),
+                 strides: tuple = (2, 2, 2, 2, 2), norm: str = 'INSTANCE', leaky: bool = True) -> None:
+        super().__init__()
+        self.encoding = encoding
+        self.num_classes = out_channels
+        self.backbone = UNet(model_name=model_name, in_channels=in_channels, out_channels=None, channels=channels,
+                             strides=strides, use_dropout=False, norm=norm, leaky=leaky)
+        self.gap = nn.AdaptiveAvgPool2d(1)
+        self.precls_conv = nn.Sequential(
+                nn.InstanceNorm2d(32),
+                nn.LeakyReLU(),
+                nn.Conv2d(32, 8, kernel_size=1)
+            )
+        self.weight_nums = [8*8, 8*8, 8*1]
+        self.bias_nums = [8, 8, 1]
+        self.controller = nn.Conv2d(256 + channels[-1], sum(self.weight_nums + self.bias_nums), kernel_size=1, stride=1, padding=0)
+        if encoding == 'CLIP':
+            self.register_buffer('protein_embedding', torch.randn(self.num_classes, 512))
+            self.text_to_vision = nn.Linear(512, 256)
+        elif encoding == 'RAND':
+            self.register_buffer('protein_embedding', torch.randn(self.num_classes, 256))
+    def parse_dynamic_params(self, params, channels, weight_nums, bias_nums):
+        assert params.dim() == 2
+        assert len(weight_nums) == len(bias_nums)
+        assert params.size(1) == sum(weight_nums) + sum(bias_nums)
+        num_insts = params.size(0)
+        num_layers = len(weight_nums)
+        params_splits = list(torch.split_with_sizes(
+            params, weight_nums + bias_nums, dim=1
+        ))
+        weight_splits = params_splits[:num_layers]
+        bias_splits = params_splits[num_layers:]
+        for l in range(num_layers):
+            if l < num_layers - 1:
+                weight_splits[l] = weight_splits[l].reshape(num_insts * channels, -1, 1, 1)
+                bias_splits[l] = bias_splits[l].reshape(num_insts * channels)
+            else:
+                weight_splits[l] = weight_splits[l].reshape(num_insts * 1, -1, 1, 1)
+                bias_splits[l] = bias_splits[l].reshape(num_insts * 1)
+            # print(weight_splits[l].shape, bias_splits[l].shape)
+        return weight_splits, bias_splits
+    def heads_forward(self, features, weights, biases, num_insts):
+        n_layers = len(weights)
+        x = features
+        for i, (w, b) in enumerate(zip(weights, biases)):
+            x = F.conv2d(
+                x, w, bias=b,
+                stride=1, padding=0,
+                groups=num_insts
+            )
+            if i < n_layers - 1:
+                x = F.leaky_relu(x)
+        return x
+    def forward(self, x_in):
+        out_shape = x_in.shape[2:]
+        dec4, out = self.backbone.extract_features(x_in)  # dec4: (B, channels[-1], H, W), out: (B, channels[0], H, W)
+        if self.encoding == 'RAND':
+            task_encoding = self.protein_embedding[..., None, None]  # (num_classes, 256, 1, 1)
+        elif self.encoding == 'CLIP':
+            task_encoding = F.leaky_relu(self.text_to_vision(self.protein_embedding))[..., None, None]  # (num_classes, 256, 1, 1)
+        else:
+            raise NotImplementedError
+        x_feat = self.gap(dec4)
+        b = x_feat.shape[0]
+        logits_array = []
+        for i in range(b):
+            x_cond = torch.cat([x_feat[i].unsqueeze(0).repeat(self.num_classes, 1, 1, 1), task_encoding], 1)
+            params = self.controller(x_cond)  # (num_classes, num_params, 1, 1)
+            params.squeeze_(-1).squeeze_(-1)  # (num_classes, num_params)
+            head_inputs = self.precls_conv(out[i].unsqueeze(0))
+            head_inputs = head_inputs.repeat(self.num_classes, 1, 1, 1)  # (num_classes, 8, H, W)
+            N, _, H, W = head_inputs.size()
+            head_inputs = head_inputs.reshape(1, -1, H, W)
+            # print(head_inputs.shape, params.shape)
+            weights, biases = self.parse_dynamic_params(params, 8, self.weight_nums, self.bias_nums)
+            logits = self.heads_forward(head_inputs, weights, biases, N)
+            logits_array.append(logits.reshape(1, -1, H, W))
+        out = torch.cat(logits_array, dim=0)
+        out = F.interpolate(out, size=out_shape, mode='bilinear', align_corners=False)
+        # print(out.shape)
+        return out
+class NLayerDiscriminator(nn.Module):
+    """Defines a PatchGAN discriminator"""
+    def __init__(self, input_nc, norm='INSTANCE', ndf=64, n_layers=3):
+        """Construct a PatchGAN discriminator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            ndf (int)       -- the number of filters in the last conv layer
+            n_layers (int)  -- the number of conv layers in the discriminator
+            norm_layer      -- normalization layer
+        """
+        super(NLayerDiscriminator, self).__init__()
+        norm_layer = norm_dict[norm]
+        use_bias = norm_layer == nn.InstanceNorm2d
+        kw = 4
+        padw = 1
+        sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]
+        nf_mult = 1
+        nf_mult_prev = 1
+        for n in range(1, n_layers):  # gradually increase the number of filters
+            nf_mult_prev = nf_mult
+            nf_mult = min(2 ** n, 8)
+            sequence += [
+                nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias),
+                norm_layer(ndf * nf_mult),
+                nn.LeakyReLU(0.2, True)
+            ]
+        nf_mult_prev = nf_mult
+        nf_mult = min(2 ** n_layers, 8)
+        sequence += [
+            nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias),
+            norm_layer(ndf * nf_mult),
+            nn.LeakyReLU(0.2, True)
+        ]
+        sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)]  # output 1 channel prediction map
+        self.model = nn.Sequential(*sequence)
+    def forward(self, input):
+        """Standard forward."""
+        return self.model(input)
+class PatchDiscriminator(nn.Module):
+    def __init__(self, in_channels, norm_type='INSTANCE'):
+        super().__init__()
+        nb_filters = [32, 64, 128, 256, 512]
+        strides = [2, 2, 2, 2, 2]
+        self.layer1 = ConvNorm(in_channels=in_channels, out_channels=nb_filters[0], kernel_size=5, stride=strides[0], norm='NONE', leaky=True)
+        self.layer2 = ConvNorm(in_channels=nb_filters[0], out_channels=nb_filters[1], kernel_size=5, stride=strides[1], norm=norm_type, leaky=True)
+        self.layer3 = ConvNorm(in_channels=nb_filters[1], out_channels=nb_filters[2], kernel_size=5, stride=strides[2], norm=norm_type, leaky=True)
+        self.layer4 = ConvNorm(in_channels=nb_filters[2], out_channels=nb_filters[3], kernel_size=5, stride=strides[3], norm=norm_type, leaky=True)
+        self.layer5 = ConvNorm(in_channels=nb_filters[3], out_channels=nb_filters[4], kernel_size=5, stride=strides[4], norm=norm_type, leaky=True)
+        self.dense_pred = ConvNorm(in_channels=nb_filters[4], out_channels=1, kernel_size=3, stride=1, norm='NONE', activation=False)
+    def forward(self, inputs):
+        x1 = self.layer1(inputs)
+        x2 = self.layer2(x1)
+        x3 = self.layer3(x2)
+        x4 = self.layer4(x3)
+        x5 = self.layer5(x4)
+        output = self.dense_pred(x5)
+        output_list = [x1, x2, x3, x4, x5, output]
+        return output_list
+class PromptPatchDiscriminator(nn.Module):
+    def __init__(self, in_channels, norm_type='INSTANCE'):
+        super().__init__()
+        nb_filters = [32, 64, 128, 256, 512]
+        strides = [2, 2, 2, 2, 2]
+        self.layer1 = ConvNorm(in_channels=in_channels, out_channels=nb_filters[0], kernel_size=5, stride=strides[0], norm='NONE', leaky=True)
+        self.layer2 = ConvNorm(in_channels=nb_filters[0], out_channels=nb_filters[1], kernel_size=5, stride=strides[1], norm=norm_type, leaky=True)
+        self.layer3 = ConvNorm(in_channels=nb_filters[1], out_channels=nb_filters[2], kernel_size=5, stride=strides[2], norm=norm_type, leaky=True)
+        self.layer4 = ConvNorm(in_channels=nb_filters[2], out_channels=nb_filters[3], kernel_size=5, stride=strides[3], norm=norm_type, leaky=True)
+        self.layer5 = ConvNorm(in_channels=nb_filters[3], out_channels=nb_filters[4], kernel_size=5, stride=strides[4], norm=norm_type, leaky=True)
+        self.attn4 = PromptAttentionModule(in_channels=nb_filters[3], prompt_channels=512, mid_channels=nb_filters[3] // 4)
+        self.attn5 = PromptAttentionModule(in_channels=nb_filters[4], prompt_channels=512, mid_channels=nb_filters[4] // 4)
+        self.dense_pred = ConvNorm(in_channels=nb_filters[4], out_channels=1, kernel_size=3, stride=1, norm='NONE', activation=False)
+    def forward(self, inputs, prompt_in):
+        x1 = self.layer1(inputs)
+        x2 = self.layer2(x1)
+        x3 = self.layer3(x2)
+        x4 = self.layer4(x3)
+        x4 = self.attn4(x4, prompt_in)
+        x5 = self.layer5(x4)
+        x5 = self.attn5(x5, prompt_in)
+        output = self.dense_pred(x5)
+        output_list = [x1, x2, x3, x4, x5, output]
+        return output_list
+class MultiScaleDiscriminator(nn.Module):
+    def __init__(self, in_channels, norm='INSTANCE', num_D=3):
+        super(MultiScaleDiscriminator, self).__init__()
+        self.num_D = num_D
+        module = PatchDiscriminator
+        for i in range(num_D):
+            netD = module(in_channels, norm)
+            setattr(self, 'layer' + str(i), netD)
+        self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False)
+    def singleD_forward(self, model, input):
+        return model(input)
+    def forward(self, input):
+        num_D = self.num_D
+        result = []
+        input_downsampled = input
+        for i in range(num_D):
+            model = getattr(self, 'layer' + str(num_D - 1 - i))
+            result.append(self.singleD_forward(model, input_downsampled))
+            if i != (num_D - 1):
+                input_downsampled = self.downsample(input_downsampled)
+        return result
+class PromptMultiScaleDiscriminator(nn.Module):
+    def __init__(self, in_channels, norm='INSTANCE', num_D=3):
+        super(PromptMultiScaleDiscriminator, self).__init__()
+        self.num_D = num_D
+        module = PromptPatchDiscriminator
+        for i in range(num_D):
+            netD = module(in_channels, norm)
+            setattr(self, 'layer' + str(i), netD)
+        self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False)
+    def singleD_forward(self, model, input, prompt_in):
+        return model(input, prompt_in)
+    def forward(self, input, prompt_in):
+        num_D = self.num_D
+        result = []
+        input_downsampled = input
+        for i in range(num_D):
+            model = getattr(self, 'layer' + str(num_D - 1 - i))
+            result.append(self.singleD_forward(model, input_downsampled, prompt_in))
+            if i != (num_D - 1):
+                input_downsampled = self.downsample(input_downsampled)
+        return result
+class HighResEnhancer(nn.Module):
+    """
+    Design a global-local network for high res generation and enhance boundary information.
+    """
+    def __init__(self,
+                 model_name: str = None,
+                 in_channels: int = 1,
+                 out_channels: int = None,
+                 coarse_channels: tuple = (16, 32, 64, 128, 256),
+                 channels: tuple = (32, 64, 128, 256, 512),
+                 use_dropout: bool = False,
+                 norm: str = 'INSTANCE',
+                 leaky: bool = True,
+                 use_dilated_bottleneck: bool = False):
+        super().__init__()
+        # define basic blocks
+        self.norm = norm
+        self.leaky = leaky
+        norm_layer = self.get_norm_layer()
+        act_layer = self.get_act_layer()
+        res_unit = ResBlock if channels[-1] <= 512 else ResBottleneck
+        # check input channels
+        assert channels[1] == coarse_channels[2], 'The number of channel-2 for coarse and number of channel-1 for fine branch should be the same.'
+        # downsample and edge information extraction:
+        # the downsample operation provides the input for coarse branch
+        self.downsample = nn.AvgPool2d(3, stride=2, padding=1)
+        # the sobel filter is operated on the downsampled image to provide edge information
+        self.sobel = SobelEdge(input_dim=2, channels=in_channels)
+        self.sobel_conv = nn.Sequential(
+            nn.Conv2d(in_channels, channels[0], kernel_size=3, stride=2, padding=1),
+            norm_layer(channels[0]),
+            act_layer()
+        )
+        # coarse generator: in_channels -> coarse_channels[2]
+        # input stride: 0
+        # output stride: 4 (as input is already 2x downsampled)
+        self.coarse = nn.Sequential(
+            nn.Conv2d(in_channels, coarse_channels[0], kernel_size=3, stride=2, padding=1),
+            norm_layer(coarse_channels[0]),
+            act_layer(),
+            res_unit(coarse_channels[0], coarse_channels[1], stride=2),
+            res_unit(coarse_channels[1], coarse_channels[2], stride=2),
+            res_unit(coarse_channels[2], coarse_channels[3], stride=2),
+            res_unit(coarse_channels[3], coarse_channels[4], stride=1),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            res_unit(coarse_channels[4], coarse_channels[3], stride=1),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            res_unit(coarse_channels[3], coarse_channels[2], stride=1),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            res_unit(coarse_channels[2], coarse_channels[2], stride=1),
+        )
+        # fine generator: used to enhance the generation for better details
+        # 1. simple encoder: channels[0] -> channels[1]
+        # input stride: 0
+        # output stride: 4
+        self.fine_encoder = nn.Sequential(
+            nn.Conv2d(in_channels, channels[0], kernel_size=3, stride=2, padding=1),
+            norm_layer(channels[0]),
+            act_layer(),
+            nn.Conv2d(channels[0], channels[1], kernel_size=3, stride=2, padding=1),
+            norm_layer(channels[1]),
+            act_layer()
+        )
+        # 2. bottleneck: channels[1] -> channels[4]
+        # input stride: 4
+        # output stride: 16
+        self.bottleneck = nn.Sequential(
+            res_unit(channels[1], channels[2], stride=2),
+            res_unit(channels[2], channels[3], stride=2),
+            res_unit(channels[3], channels[4], stride=1),
+            res_unit(channels[4], channels[4], stride=1),
+        )
+        if use_dilated_bottleneck:
+            self.bottleneck.add_module('dilated_block_1',
+                                       nn.Sequential(
+                                           nn.Conv2d(channels[4], channels[4], kernel_size=3, stride=1, padding=1, dilation=1),
+                                           norm_layer(channels[4]),
+                                           act_layer()
+                                       ))
+            self.bottleneck.add_module('dilated_block_2',
+                                        nn.Sequential(
+                                             nn.Conv2d(channels[4], channels[4], kernel_size=3, stride=1, padding=2, dilation=2),
+                                             norm_layer(channels[4]),
+                                             act_layer()
+                                        ))
+            self.bottleneck.add_module('dilated_block_3',
+                                        nn.Sequential(
+                                             nn.Conv2d(channels[4], channels[4], kernel_size=3, stride=1, padding=5, dilation=5),
+                                             norm_layer(channels[4]),
+                                             act_layer()
+                                        ))
+            self.bottleneck.add_module('dilated_block_4',
+                                        nn.Sequential(
+                                             nn.Conv2d(channels[4], channels[4], kernel_size=3, stride=1, padding=1, dilation=1),
+                                             norm_layer(channels[4]),
+                                             act_layer()
+                                        ))
+            self.bottleneck.add_module('dilated_block_5',
+                                        nn.Sequential(
+                                             nn.Conv2d(channels[4], channels[4], kernel_size=3, stride=1, padding=2, dilation=2),
+                                             norm_layer(channels[4]),
+                                             act_layer()
+                                        ))
+            self.bottleneck.add_module('dilated_block_6',
+                                        nn.Sequential(
+                                             nn.Conv2d(channels[4], channels[4], kernel_size=3, stride=1, padding=5, dilation=5),
+                                             norm_layer(channels[4]),
+                                             act_layer()
+                                        ))
+        # 3. simple decoder: channels[4] -> channels[0]
+        # input stride: 16
+        # output stride: 2
+        self.decoder = nn.Sequential(
+            res_unit(channels[4], channels[3], stride=1),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            res_unit(channels[3], channels[2], stride=1),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            res_unit(channels[2], channels[1], stride=1),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            res_unit(channels[1], channels[0], stride=1),
+        )
+        # output operation that combines both feature branch and edge branch
+        # input stride: 2
+        # output stride: 0
+        self.output = nn.Sequential(
+            nn.Conv2d(2 * channels[0], channels[0], kernel_size=3, stride=1, padding=1),
+            norm_layer(channels[0]),
+            act_layer(),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            nn.Conv2d(channels[0], out_channels, kernel_size=1, stride=1, bias=False)
+        )
+    def get_norm_layer(self):
+        if self.norm == 'INSTANCE':
+            return partial(nn.InstanceNorm2d, affine=False)
+        elif self.norm == 'BATCH':
+            return partial(nn.BatchNorm2d, affine=True, track_running_stats=True)
+        elif self.norm == 'GROUP':
+            return partial(nn.GroupNorm, num_groups=8)
+        else:
+            raise NotImplementedError(f'Normalization layer {self.norm} is not implemented.')
+    def get_act_layer(self):
+        if self.leaky:
+            return partial(nn.LeakyReLU, inplace=False)
+        else:
+            return partial(nn.ReLU, inplace=False)
+    def forward(self, inputs):
+        """
+        Args:
+            inputs: (B, C, H, W), input IMC image
+        """
+        # downsample and edge information extraction
+        downsampled = self.downsample(inputs)  # 0 -> 2x stride
+        edge = self.sobel(inputs)
+        edge = self.sobel_conv(edge)
+        # coarse generator
+        coarse = self.coarse(downsampled)  # 2x stride -> 4x stride
+        # fine generator
+        fine = self.fine_encoder(inputs)  # 0x stride -> 4x stride
+        # add coarse and fine information together
+        fine = self.bottleneck(fine + coarse)  # 4x stride -> 16x stride
+        fine = self.decoder(fine)  # 16x stride -> 2x stride
+        # output operation
+        output = self.output(torch.cat([edge, fine], dim=1))
+        return output

test_data/1.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d97725edcd248d40aca15fc720f6caf46e55e5f2eab28fa7a28a0e8a1448dc80
+size 1890089

test_data/10.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e43c1e80c83dc7898163b54338485fb092c3470326914cd697d700970ba247a
+size 1935806

test_data/11.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:144aac1b1e0d4566133eeb62d65e26fe29d430f082e9fcb0b4fd1794df43a406
+size 1920270

test_data/12.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bb067a4445326aa36775a728d4d0bdf8ea622f3dc1683b4d1d14e84b31b4e98
+size 1286013

test_data/13.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba481f70e969558838d17e608013cd838d858700fa628b857766ea44060cb96c
+size 1858792

test_data/14.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd5f4c6da4a8092095f9749869480281835a572b11086b82c1c1a6e230792071
+size 1851990

test_data/15.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7518a1c2aa2432375794330262570d23631d9a2ebaa4ce924a9ad49df87218b1
+size 1905786

test_data/16.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4d452f027d2d05088142810a0e3ab9d5692898685182b6bfd0a64ebc1d033ee
+size 1894100

test_data/17.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43d29ce1f026a7b7f19746dba61e69e6682514164d02dae2f43575eb8f779b77
+size 1966934

test_data/18.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a8cec8db943c7f24a6cb63e0d935db729f883ff7ea2fafe72859bcbc9371711
+size 1894208

test_data/19.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84795f8d8f41e27e06c4fa1fa0e1a46e753ff8359b84f6fcc334d50ce28bf144
+size 1901645

test_data/2.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6327b7e06cfc10eb075d45715dfb2a1807a7899bafe6d52c5eb5422332121f51
+size 1918917

test_data/20.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66ab36131f7630743676ca7d60c4c52e518f296c17b613449b1a45a7c565bfdd
+size 1834266

test_data/21.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fbb4bad0e82ac5fe66f8e56c5a3c45eefe46c9c96274f258d81f5a8da4f196a
+size 1898715

test_data/22.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15ec9e41c7ec036284dea853035976a8957f75d2974e9821f0f59e082adce622
+size 1898663

test_data/23.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:173f0e81a90fe67458947d317e60c5d5227760a30f84bd172913f29c51604bfe
+size 1772117

test_data/24.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4bc4ddfe353eabc21369b1615b6b1800ace7fce3f052b15e2fe5a04e897a9cf
+size 1933801

test_data/25.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0db0ddefd47d481b373aceb9aa9f6e9fdb671a256435e5e1e6cb78c1f5a650c7
+size 1971978

test_data/26.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2b0f85da6d5c34c205a54017b18b0c9dbeea04b061f45070cbc4b1dca36e70
+size 1802038

test_data/27.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e67efcfe7eec68477e088104f31732719a0f8b3ca86c92cc5e87f1ab1b465370
+size 1633565

test_data/28.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deb5dbffd27146b0c65378c82707219376c28d40de9d267acdb3f941fb8f3f87
+size 1462921

test_data/29.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f4e36c7e5203a3097929ffc987bd802ba0d4b7e2d4641a22623938bea0e4a94
+size 1919319

test_data/3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fce25bb09db8ed1d7d1537573ea86b614c095fe0398227a2cfbbaac70ac190f2
+size 1987452