DIS-SAM / IS_Net /saliency_toolbox.py
jwlarocque's picture
Create DIS-SAM space
ab7d699
raw
history blame
16.4 kB
import os
import cv2
import sys
import numpy as np
from glob import glob
from tqdm import tqdm
from scipy.ndimage import correlate
from scipy.ndimage.morphology import distance_transform_edt
from joblib import Parallel, delayed
eps = sys.float_info.epsilon
def calcualte_once(gt_name,sm_dir,gt_threshold,beta,measures):
values = dict()
for idx in measures:
values[idx] = list()
if idx == 'Max-F':
values['Precision'] = list()
values['Recall'] = list()
_, name = os.path.split(gt_name)
sm_name = os.path.join(sm_dir, name)
if os.path.exists(sm_name):
gt, sm = read_and_normalize(gt_name, sm_name, gt_threshold)
if 'MAE' in measures:
values['MAE'].append(mean_square_error(gt, sm))
if 'E-measure' in measures:
values['E-measure'].append(e_measure(gt, sm))
if 'S-measure' in measures:
values['S-measure'].append(s_measure(gt, sm))
if 'Adp-F' in measures:
values['Adp-F'].append(adaptive_fmeasure(gt, sm, beta))
if 'Wgt-F' in measures:
values['Wgt-F'].append(weighted_fmeasure(gt, sm))
if 'Max-F' in measures:
prec, recall = prec_recall(gt, sm, 256) # 256 thresholds between 0 and 1
values['Precision'].append(prec)
values['Recall'].append(recall)
else:
print("\n{} not found!".format(os.path.basename(sm_name)))
print('---' * 10)
return values
def calculate_measures(gt_dir, sm_dir, measures, save=False, beta=np.sqrt(0.3), gt_threshold=0.5, n_thread=1):
"""
function that calculates Saliency measures for given directories
arameters
----------
gt_dir : str
The path to the ground truth directory
sm_dir : str
The path to the predicted saliency map directory
measures : list
list of measure names which need to be calculated
supported measures: 'MAE' => Mean Squared Error
'E-measure' => Enhanced-alignment measure
'S-measure' => Structure-measure
'Max-F' => Maximum F-measure
'Adp-F' => Adaptive F-measure
'Wgt-F' => Weighted F-measure
save : str
If spesified, the results will be saved in 'save' directory
beta : float
beta parameter that is used in F-measure formula. default is sqrt(0.3)
gt_threshold : float
The threshold that is used to binrize ground truth maps.
Returns
-------
values : dictionary
a dict containing the results
"""
values = dict()
for idx in measures:
values[idx] = list()
if idx == 'Max-F':
values['Precision'] = list()
values['Recall'] = list()
results = Parallel(n_jobs=n_thread)(delayed(calcualte_once)(gt_name,sm_dir,gt_threshold,beta,measures) for gt_name in tqdm(glob(os.path.join(gt_dir, '*')), total=len(glob(os.path.join(gt_dir, '*')))))
for i in results:
if 'MAE' in measures:
values['MAE'].append(i["MAE"])
if 'E-measure' in measures:
values['E-measure'].append(i["E-measure"])
if 'S-measure' in measures:
values['S-measure'].append(i["S-measure"])
if 'Adp-F' in measures:
values['Adp-F'].append(i["Adp-F"])
if 'Wgt-F' in measures:
values['Wgt-F'].append(i["Wgt-F"])
if 'Max-F' in measures: # 256 thresholds between 0 and 1
values['Precision'].append(i["Precision"])
values['Recall'].append(i["Recall"])
if 'MAE' in measures:
values['MAE'] = np.mean(values['MAE'])
if 'E-measure' in measures:
values['E-measure'] = np.mean(values['E-measure'])
if 'S-measure' in measures:
values['S-measure'] = np.mean(values['S-measure'])
if 'Adp-F' in measures:
values['Adp-F'] = np.mean(values['Adp-F'])
if 'Wgt-F' in measures:
values['Wgt-F'] = np.mean(values['Wgt-F'])
if 'Max-F' in measures:
values['Precision'] = np.mean(np.hstack(values['Precision'][:]), 1)
values['Recall'] = np.mean(np.hstack(values['Recall'][:]), 1)
f_measures = (1 + beta ** 2) * values['Precision'] * values['Recall'] / (
beta ** 2 * values['Precision'] + values['Recall'])
values['Fmeasure_all_thresholds'] = f_measures
values['Max-F'] = np.max(f_measures)
if save:
if not os.path.isdir(save):
os.mkdir(save)
for key in values.keys():
np.save(os.path.join(save, key + ".npy"), values[key])
return values
def read_and_normalize(gt_path, sm_path, gt_threshold=0.5):
"""
function that reads, normalizes and crops a ground truth and a saliency map
parameters
----------
gt_path : str
The path to a ground truth map
sm_path : str
The path to a predicted saliency map
gt_threshold : float
The threshold that is used to binrize ground truth maps.
Returns
-------
gt_img, sm_img : numpy.ndarray
The prepared arrays
"""
gt_img = norm_img(cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE))
gt_img = (gt_img >= gt_threshold).astype(np.float32)
sm_img = norm_img(cv2.imread(sm_path, cv2.IMREAD_GRAYSCALE))
if sm_img.shape[0] != gt_img.shape[0] or sm_img.shape[1] != gt_img.shape[1]:
sm_img = cv2.resize(sm_img, (gt_img.shape[1], gt_img.shape[0]))
return gt_img, sm_img
def norm_img(im):
return cv2.normalize(im.astype('float'),
None,
0.0, 1.0,
cv2.NORM_MINMAX)
# MAE
def mean_square_error(gt, sm):
return np.mean(np.abs(sm - gt))
# E-measure
# article: https://arxiv.org/abs/1805.10421
# original code [Matlab]: https://github.com/DengPingFan/E-measure
def e_measure(gt, sm):
"""
This fucntion computes the Enhanced-alignment Measure (E-Measure) between the saliency map and the ground truth
article: https://arxiv.org/abs/1805.10421
original code [Matlab]: https://github.com/DengPingFan/E-measure
parameters
----------
gt : numpy.ndarray
The path to the ground truth directory
sm : numpy.ndarray
The path to the predicted saliency map directory
Returns
-------
value : float
The calculated E-masure
"""
sm = adptive_binary(sm)
gt = gt.astype(np.bool_)
sm = sm.astype(np.bool_)
dgt = gt.astype(np.float32)
dsm = sm.astype(np.float32)
if np.sum(dgt) == 0: # if the gt is completely black
enhanced_matrix = 1.0 - dsm # only calculate the black area of intersection
elif np.mean(dgt) == 1: # if the gt is completely white
enhanced_matrix = dsm # only calcualte the white area of intersection
else:
# Normal case:
# 1.compute alignment matrix
align_matrix = alignment_term(dsm, dgt)
# 2.compute enhanced alignment matrix
enhanced_matrix = enhanced_alignment_term(align_matrix)
height, width = gt.shape
value = np.sum(enhanced_matrix) / (height * width - 1 + eps)
return value
def alignment_term(dgt, dsm):
# compute global mean
mu_fm = np.mean(dsm)
mu_gt = np.mean(dgt)
# compute the bias matrix
align_fm = dsm - mu_fm
align_gt = dgt - mu_gt
# compute alignment matrix
align_Matrix = 2 * (align_gt * align_fm) / (align_gt * align_gt + align_fm * align_fm + eps)
return align_Matrix
def enhanced_alignment_term(align_matrix):
enhanced = ((align_matrix + 1) ** 2) / 4
return enhanced
def adptive_binary(sm):
adaptive_threshold = 2 * np.mean(sm)
if adaptive_threshold > 1:
adaptive_threshold = 1
binary_sm = (sm >= adaptive_threshold).astype(np.float32)
return binary_sm
# S-Measure
# article: https://www.crcv.ucf.edu/papers/iccv17/1164.pdf
# Matlab code: https://github.com/DengPingFan/S-measure
def s_measure(gt, sm):
"""
This fucntion computes the structural similarity (S-Measure) between the saliency map and the ground truth
article: https://www.crcv.ucf.edu/papers/iccv17/1164.pdf
original code [Matlab]: https://github.com/DengPingFan/S-measure
parameters
----------
gt : numpy.ndarray
The path to the ground truth directory
sm : numpy.ndarray
The path to the predicted saliency map directory
Returns
-------
value : float
The calculated S-masure
"""
gt_mean = np.mean(gt)
if gt_mean == 0: # if the GT is completely black
sm_mean = np.mean(sm)
measure = 1.0 - sm_mean # only calculate the area of intersection
elif gt_mean == 1: # if the GT is completely white
sm_mean = np.mean(sm)
measure = sm_mean.copy() # only calcualte the area of intersection
else:
alpha = 0.5
measure = alpha * s_object(sm, gt) + (1 - alpha) * s_region(sm, gt)
if measure < 0:
measure = 0
return measure
def ssim(gt, sm):
gt = gt.astype(np.float32)
height, width = sm.shape
num_pixels = width * height
# Compute the mean of SM,GT
sm_mean = np.mean(sm)
gt_mean = np.mean(gt)
# Compute the variance of SM,GT
sigma_x2 = np.sum(np.sum((sm - sm_mean) ** 2)) / (num_pixels - 1 + eps)
sigma_y2 = np.sum(np.sum((gt - gt_mean) ** 2)) / (num_pixels - 1 + eps)
# Compute the covariance
sigma_xy = np.sum(np.sum((sm - sm_mean) * (gt - gt_mean))) / (num_pixels - 1 + eps)
alpha = 4 * sm_mean * gt_mean * sigma_xy
beta = (sm_mean ** 2 + gt_mean ** 2) * (sigma_x2 + sigma_y2)
if alpha != 0:
ssim_value = alpha / (beta + eps)
elif alpha == 0 and beta == 0:
ssim_value = 1.0
else:
ssim_value = 0
return ssim_value
def divide_sm(sm, x, y):
# copy the 4 regions
lt = sm[:y, :x]
rt = sm[:y, x:]
lb = sm[y:, :x]
rb = sm[y:, x:]
return lt, rt, lb, rb
def divide_gt(gt, x, y):
height, width = gt.shape
area = width * height
# copy the 4 regions
lt = gt[:y, :x]
rt = gt[:y, x:]
lb = gt[y:, :x]
rb = gt[y:, x:]
# The different weight (each block proportional to the GT foreground region).
w1 = (x * y) / area
w2 = ((width - x) * y) / area
w3 = (x * (height - y)) / area
w4 = 1.0 - w1 - w2 - w3
return lt, rt, lb, rb, w1, w2, w3, w4
def centroid(gt):
# col
rows, cols = gt.shape
if np.sum(gt) == 0:
x = np.round(cols / 2)
y = np.round(rows / 2)
else:
total = np.sum(gt)
i = np.arange(cols).reshape(1, cols) + 1
j = np.arange(rows).reshape(rows, 1) + 1
x = int(np.round(np.sum(np.sum(gt, 0, keepdims=True) * i) / total))
y = int(np.round(np.sum(np.sum(gt, 1, keepdims=True) * j) / total))
return x, y
def s_region(gt, sm):
x, y = centroid(gt)
gt_1, gt_2, gt_3, gt_4, w1, w2, w3, w4 = divide_gt(gt, x, y)
sm_1, sm_2, sm_3, sm_4 = divide_sm(sm, x, y)
q1 = ssim(sm_1, gt_1)
q2 = ssim(sm_2, gt_2)
q3 = ssim(sm_3, gt_3)
q4 = ssim(sm_4, gt_4)
region_value = w1 * q1 + w2 * q2 + w3 * q3 + w4 * q4
return region_value
def object(gt, sm):
x = np.mean(sm[gt == 1])
# compute the standard deviations of the foreground or background in sm
sigma_x = np.std(sm[gt == 1])
score = 2.0 * x / (x ** 2 + 1.0 + sigma_x + eps)
return score
def s_object(gt, sm):
# compute the similarity of the foreground in the object level
sm_fg = sm.copy()
sm_fg[gt == 0] = 0
o_fg = object(sm_fg, gt)
# compute the similarity of the background
sm_bg = 1.0 - sm.copy()
sm_bg[gt == 1] = 0
o_bg = object(sm_bg, gt == 0)
u = np.mean(gt)
object_value = u * o_fg + (1 - u) * o_bg
return object_value
# Weighted F-Measure
# article: https://ieeexplore.ieee.org/document/6909433
# Matlab code: https://cgm.technion.ac.il/Computer-Graphics-Multimedia/Software/FGEval/
def weighted_fmeasure(gt, sm, beta2=1):
"""
This fucntion computes Weighted F-Measure between the saliency map and the ground truth
article: https://ieeexplore.ieee.org/document/6909433
original code [Matlab]: https://cgm.technion.ac.il/Computer-Graphics-Multimedia/Software/FGEval/
parameters
----------
gt : numpy.ndarray
The path to the ground truth directory
sm : numpy.ndarray
The path to the predicted saliency map directory
Returns
-------
value : float
The calculated Weighted F-Measure
"""
dst, idx = distance_transform_edt(1 - gt, return_indices=True)
raw_idx = idx[0][gt == 0]
col_idx = idx[1][gt == 0]
e = np.abs(sm - gt).astype(np.float32)
et = np.abs(sm - gt).astype(np.float32)
et[gt == 0] = et[raw_idx, col_idx]
k = matlab_style_gauss2d(shape=(7, 7), sigma=5)
ea = correlate(et.astype(np.float32), k, mode='constant')
min_e_ea = np.abs(sm - gt).astype(np.float32)
min_e_ea[gt * (ea < e) == 1] = ea[gt * (ea < e) == 1]
b = np.ones_like(gt).astype(np.float32)
b[gt == 0] = 2 - 1 * np.exp(np.log(1 - 0.5) / 5. * dst[gt == 0])
ew = min_e_ea * b
tpw = np.sum(gt) - np.sum(ew[gt == 1])
fpw = np.sum(ew[gt == 0])
rec = 1 - np.mean(ew[gt == 1]) # Weighed Recall
prec = tpw / (eps + tpw + fpw) # Weighted Precision
value = (1 + beta2) * (rec * prec) / (eps + (beta2 * rec) + prec)
return value
def matlab_style_gauss2d(shape=(3, 3), sigma=0.5):
"""
2D gaussian mask - should give the same result as MATLAB's
fspecial('gaussian',[shape],[sigma])
"""
m, n = [(ss - 1.) / 2. for ss in shape]
y, x = np.ogrid[-m:m + 1, -n:n + 1]
h = np.exp(-(x * x + y * y) / (2. * sigma * sigma))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
sumh = h.sum()
if sumh != 0:
h /= sumh
return h
# Adaptive F-measure
def adaptive_fmeasure(gt, sm, beta):
"""
This fucntion computes Adaptive F-measure between the saliency map and the ground truth using
the binary method proposed in:
https://ieeexplore.ieee.org/document/5206596
parameters
----------
gt : numpy.ndarray
The path to the ground truth directory
sm : numpy.ndarray
The path to the predicted saliency map directory
Returns
-------
value : float
The calculated Adaptive F-measure
"""
gt_idx = np.where(gt > 0)
gt_cnt = np.sum(gt)
if gt_cnt == 0:
prec = []
recall = []
else:
adaptive_threshold = 2 * np.mean(sm)
if adaptive_threshold > 1:
adaptive_threshold = 1
sm_binary = (sm >= adaptive_threshold).astype(np.float32)
hit_cnt = np.sum(sm_binary[gt_idx])
alg_cnt = np.sum(sm_binary)
if hit_cnt == 0:
prec = 0
recall = 0
else:
prec = hit_cnt / (alg_cnt + eps)
recall = hit_cnt / gt_cnt
value = (1 + beta ** 2) * prec * recall / ((beta ** 2 * prec + recall) + eps)
return value
def prec_recall(gt, sm, num_th):
"""
This fucntion computes Adaptive F-measure between the saliency map and the ground truth using
the binary method proposed in:
https://ieeexplore.ieee.org/document/5206596
The results of this dunction will be used to calculate Max-F measure and plot PR and F-Threshold Curves
parameters
----------
gt : numpy.ndarray
The path to the ground truth directory
sm : numpy.ndarray
The path to the predicted saliency map directory
num_th : interger
The total number of thresholds between 0 and 1
Returns
-------
prec, recall: numpy.ndarray
The calculated Precision and Recall (shape: (num_th,1))
"""
gt_idx = np.where(gt > 0)
gt_cnt = np.sum(gt)
if gt_cnt == 0:
prec = []
recall = []
else:
hit_cnt = np.zeros((num_th, 1), np.float32)
alg_cnt = np.zeros((num_th, 1), np.float32)
thresholds = np.linspace(0, 1, num_th)
for k, curTh in enumerate(thresholds):
sm_binary = (sm >= curTh).astype(np.float32)
hit_cnt[k] = np.sum(sm_binary[gt_idx])
alg_cnt[k] = np.sum(sm_binary)
prec = hit_cnt / (alg_cnt + eps)
recall = hit_cnt / gt_cnt
return prec, recall