Faster Multi-Object Segmentation using Parallel Quadratic Pseudo-Boolean Optimization, ICCV 2021 Paper
Author: Niels Jeppesen (niejep@dtu.dk)
This notebook is based on the NucleiSegmentationPart2b.ipynb notebook by Jeppesen et. al. Parts of this notebook is taken directly from the NucleiSegmentationPart2b.ipynb notebook without modification.
Note: To prepare the data required in this notebook, please run the Nuclei Segmentation Part1: Data Preparetion notebook.
The goal is to benchmark three different QPBO implementations (P-QPBO, M-QPBO and K-QPBO) on a set of 2D image segmentation tasks.
To run the benchmarks, a modified version of the slgbuilder package, which includes P-QPBO and M-QPBO is required. This version of the slgbuilder package relies on the shrdr package for P-QPBO and M-QPBO C++ wrappers. Both packages are included in the supplementary material alongside this notebook.
import os
import platform
import time
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.path import Path
from scipy.ndimage import interpolation
from skimage.measure import find_contours
from sklearn import metrics
from tqdm import tqdm
from slgbuilder import GraphObject, QPBOBuilder, MQPBOBuilder, PQPBOBuilder
We start out by loading the data preprared in the previous notebook.
comparison_dir = './nuclei_comparison_data'
results_dir = '../benchmark'
# Load prepared data.
data_dic = np.load(os.path.join(comparison_dir, 'data_dic.npz'))
data_prep_dic = np.load(os.path.join(comparison_dir, 'data_prep_dic.npz'))
mask_dic = np.load(os.path.join(comparison_dir, 'mask_dic.npz'))
center_dic = np.load(os.path.join(comparison_dir, 'center_dic.npz'))
center_approx_dic = np.load(os.path.join(comparison_dir, 'center_approx_dic.npz'))
names = list(data_dic.keys())
We need unfold the nuclei to use them with column-ordered graphs.
def unfold_image(img, center, max_dists=None, r_min=1, r_max=50, angles=90, steps=65, prefilter=False):
"""Radially samples an image around a given center position with a given radius and resolution."""
# Sampling angles and radii.
angles = np.linspace(0, 2*np.pi, angles, endpoint=False, dtype=np.float32)
distances = np.linspace(r_min, r_max, steps, endpoint=True, dtype=np.float32)
if max_dists is not None:
max_dists.append(np.max(distances))
# Get angles.
angles_cos = np.cos(angles)
angles_sin = np.sin(angles)
# Calculate points positions.
x_pos = center[0] + np.outer(angles_cos, distances)
y_pos = center[1] + np.outer(angles_sin, distances)
# Create list of sampling points.
sampling_points = np.array([x_pos, y_pos]).transpose()
sampling_shape = sampling_points.shape
sampling_points_flat = sampling_points.reshape((-1, 2))
# Sample from image.
samples = interpolation.map_coordinates(img, sampling_points_flat.transpose(), mode='nearest', prefilter=prefilter)
samples = samples.reshape(sampling_shape[:2])
return samples, sampling_points
We define functions for constructing the energy function using slgbuilder, specifically the MQPBOBuilder. We also define functions for copying a builder, building and solving the defined QPBO problem. The reason we copy the builder is to save time in the build process.
def build_and_solve(helper):
build_time = None
try:
start = time.time_ns()
helper.build_graph()
build_time = time.time_ns() - start
except Exception as ex:
print(str(ex))
raise ex
solve_time = None
weak_persistencies_time = None
if build_time is not None:
try:
start = time.time_ns()
twice_energy = helper.solve(compute_weak_persistencies=False)
solve_time = time.time_ns() - start
start = time.time_ns()
helper.graph.compute_weak_persistencies()
weak_persistencies_time = time.time_ns() - start
except Exception as ex:
print(str(ex))
raise ex
return twice_energy, build_time, solve_time, weak_persistencies_time
def print_results(stats):
s = '%s:\nNodes: %s, Terms: %s, Build: %s, Solve: %s, Weak persistencies: %s, Twice energy: %s, Timstamp: %s'
print(s % tuple(stats))
def copy_and_solve(builder, builder_fn, cap_type=np.int32):
# Create new builder.
helper = builder_fn(0, 0)
# Copy graph structure from previous builder.
helper.objects = builder.objects[:]
helper.nodes = builder.nodes[:]
helper.unary_nodes = builder.unary_nodes[:]
helper.unary_e0 = builder.unary_e0[:]
helper.unary_e1 = builder.unary_e1[:]
helper.pairwise_from = builder.pairwise_from[:]
helper.pairwise_to = builder.pairwise_to[:]
helper.pairwise_e00 = builder.pairwise_e00[:]
helper.pairwise_e01 = builder.pairwise_e01[:]
helper.pairwise_e10 = builder.pairwise_e10[:]
helper.pairwise_e11 = builder.pairwise_e11[:]
node_count = 2 * np.sum([o.data.size for o in helper.objects])
term_count = 2 * np.sum([l.size for l in helper.pairwise_from])
# Build and solve.
twice_energy, build_time, solve_time, weak_persistencies_time = build_and_solve(helper)
return helper, [type(helper.graph).__name__, node_count, term_count, build_time, solve_time, weak_persistencies_time, twice_energy, datetime.now()]
def create_qpbo_builder(data, centers, layered_smoothness=2, exclusion_margin=.5, cap_type=np.int32):
# Lists for storing nerve objects.
outer_nerves = []
max_dists = []
# For each center, create an inner and outer never.
for center in centers:
# Unfold nerve.
outer_samples, outer_sample_points = unfold_image(-data, center, r_min=0.2, r_max=80, steps=90, angles=90, max_dists=max_dists)
# Create outer and inner nerve objects.
outer_diff_samples = np.gradient(outer_samples, axis=0)
if np.issubdtype(cap_type, np.integer):
outer_diff_samples *= 2**12
outer_diff_samples = outer_diff_samples.astype(cap_type)
outer_diff_sample_points = outer_sample_points
outer_nerves.append(GraphObject(outer_diff_samples, outer_diff_sample_points))
helper = MQPBOBuilder(capacity_type=cap_type)
helper.add_objects(outer_nerves)
helper.add_layered_boundary_cost()
helper.add_layered_smoothness(delta=layered_smoothness)
# Add exclusion constraints between all pairs of outer nerves.
if exclusion_margin is not None:
exclusions_dic = {}
for i in range(len(outer_nerves)):
ex_list = []
for j in range(i + 1, len(outer_nerves)):
dist = np.sum((centers[i] - centers[j])**2, axis=-1)
if np.any(dist[~np.isnan(dist)] <= (max_dists[i] + max_dists[j])**2 + exclusion_margin):
ex_list.append(outer_nerves[j])
if ex_list:
exclusions_dic[outer_nerves[i]] = ex_list
helper.add_layered_exclusions(exclusions_dic, margin=exclusion_margin, distance_metric='l2')
node_count = 2 * np.sum([o.data.size for o in helper.objects])
edge_count = 2 * np.sum([l.size for l in helper.pairwise_from])
return helper
We'll also need some helper functions for drawing the segmentation results.
def get_radial_segmentation_masks(helper, data_shape):
# Create lists to return.
masks_pred = []
points_x = []
points_y = []
indices = np.indices(data_shape[:2], dtype=np.uint16).reshape(2, -1).transpose()
# For each object...
for i, obj in enumerate(helper.objects):
# Get segmentation.
segment = helper.get_labels(obj)
# Create line.
line = np.clip(np.count_nonzero(segment == 1, axis=0), 0, len(obj.sample_points) - 1)
# Get actual points.
point_indices = tuple(np.asarray([line, np.arange(len(line))]))
points = obj.sample_points[point_indices]
# Smooth.
# points = np.append(points, points, axis=0)
# m = len(points)
# tck, u = interpolate.splprep(points.transpose(), s=m-np.sqrt(2*m))
# unew = np.linspace(0, 1, 2*m, endpoint=True)
# points = np.array(interpolate.splev(unew, tck)).transpose()
# points = points[m // 2:-m // 2]
# Close line.
points = np.append(points, points[:1], axis=0)
points_x.append(points[..., 1])
points_y.append(points[..., 0])
codes = []
for p in points:
codes.append(Path.LINETO)
codes[0] = Path.MOVETO
codes[-1] = Path.CLOSEPOLY
plot_path = Path(points, codes)
point_mask = plot_path.contains_points(indices).reshape(data_shape[:2])
masks_pred.append(point_mask)
return masks_pred, (points_x, points_y)
def get_mask_contours(masks):
points_x = []
points_y = []
for mask in masks:
contour = find_contours(mask, .5)[0]
points_x.append(contour[..., 1])
points_y.append(contour[..., 0])
return points_x, points_y
def calculate_scores(score_function, masks_gt, masks_pred):
return np.array([score_function(gt.flat, pred.flat) for gt, pred in zip(masks_gt, masks_pred)])
def calculate_iou_scores(masks_gt, masks_pred):
return calculate_scores(metrics.jaccard_score, masks_gt, masks_pred)
def calculate_kaggle_score(masks_gt, masks_pred):
iou_scores = calculate_iou_scores(masks_gt, masks_pred)
iou_thresholds = np.arange(.5, 1, .05)
score = 0
for t in iou_thresholds:
tp = np.count_nonzero(t <= iou_scores)
fn = len(iou_scores) - tp
score += tp / (tp + fn)
score /= len(iou_thresholds)
return score
def draw_segmentations(helper, data, masks_gt, centers):
"""Draw all segmentations for objects in the helper on top of the data."""
masks_pred, (points_x_pred, points_y_pred) = get_radial_segmentation_masks(helper, data.shape)
# Get contour lines for masks.
points_x_gt, points_y_gt = get_mask_contours(masks_gt)
# Calculate scores.
kaggle_score = calculate_kaggle_score(masks_gt, masks_pred)
iou_scores = calculate_iou_scores(masks_gt, masks_pred)
# Create figure.
plt.figure(figsize=(20, 20))
ax_lines = plt.subplot(1, 1, 1, title=f'Kaggle score: {round(kaggle_score, 4)}')
ax_lines.set_xlim([0, data.shape[1]-1])
ax_lines.set_ylim([data.shape[0]-1, 0])
ax_lines.imshow(data, cmap='gray')
# Plot points.
for i, (x, y) in enumerate(zip(points_x_pred, points_y_pred)):
ax_lines.plot(x, y, color=plt.cm.Set1(i % 8))
for i, (x, y) in enumerate(zip(points_x_gt, points_y_gt)):
ax_lines.plot(x, y, '--', color=plt.cm.Set1(i % 8))
for i, ((x, y), iou) in enumerate(zip(centers, iou_scores)):
ax_lines.text(y, x, '%d' % round(iou*100), color='orange', verticalalignment='center', horizontalalignment='center', fontsize=8)
plt.show()
def show_object_data(objects):
# Show object data.
fig = plt.figure(figsize=(20, 5))
fig.suptitle(f'{len(objects)} first objects')
for i, obj in enumerate(objects):
ax = plt.subplot(2, 8, i + 1)
ax.imshow(obj.data)
ax.set_xticks([])
ax.set_yticks([])
plt.show()
We solve QPBO problem with each of the three different implementations a number of times. For P-QPBO we benchmark the implementation for a number of different parallel thread configurations.
cap_type = np.int32
runs_per_config = 10
cpu_counts = [1, 2, 4, 6, 8, 16]
parallel_builders = [PQPBOBuilder]
serial_builders = [QPBOBuilder, MQPBOBuilder]
pred_radial_dic = {}
points_radial_dic = {}
helper_stats_list = []
cpu_counts_list = []
helper = None
pbar = tqdm(names)
for n, name in enumerate(pbar):
pbar.set_description(f"Processing '{name}'")
previous_energy = -1
# Create reference helper.
reference_helper = create_qpbo_builder(data_prep_dic[name], center_approx_dic[name], cap_type=cap_type)
for builder in parallel_builders:
for cpu_count in cpu_counts:
for i in range(runs_per_config):
desc = f'{name} - {builder.__name__} - cpus: {cpu_count}, run: {i + 1}/{runs_per_config}'
pbar.set_description(f"Processing '{desc}'")
# Create builder function.
builder_fn = lambda estimated_nodes, estimated_terms:builder(estimated_nodes, estimated_terms, capacity_type=cap_type, num_threads=cpu_count)
# Clear memory.
del helper
# Build and solve.
helper, helper_stats = copy_and_solve(reference_helper, builder_fn, cap_type)
energy = helper_stats[-2]
helper_stats.insert(0, name)
helper_stats.insert(1, len(center_approx_dic[name]))
# Save stats.
helper_stats_list.append(helper_stats)
cpu_counts_list.append(cpu_count)
if previous_energy != -1:
assert energy == previous_energy, f'{energy} != {previous_energy}'
previous_energy = energy
for builder in serial_builders:
for i in range(runs_per_config):
desc = f'{name} - {builder.__name__} - run: {i + 1}/{runs_per_config}'
pbar.set_description(f"Processing '{desc}'")
# Create builder function.
builder_fn = lambda estimated_nodes, estimated_terms:builder(estimated_nodes, estimated_terms, capacity_type=cap_type)
# Clear memory.
del helper
# Build and solve.
helper, helper_stats = copy_and_solve(reference_helper, builder_fn, cap_type)
energy = helper_stats[-2]
helper_stats.insert(0, name)
helper_stats.insert(1, len(center_approx_dic[name]))
# Save stats.
helper_stats_list.append(helper_stats)
cpu_counts_list.append(-1)
if previous_energy != -1:
assert energy == previous_energy, f'{energy} != {previous_energy}'
previous_energy = energy
# Save predictions for one of them.
pred_radial_dic[name] = [helper.get_labels(o) for o in helper.objects]
points_radial_dic[name] = [o.sample_points for o in helper.objects]
# Draw every nth result.
if n % 50 == 0:
# Show first 16 objects as images.
show_object_data(helper.objects[:16])
# Draw results.
draw_segmentations(helper, data_dic[name], mask_dic[name], center_approx_dic[name])
# Print segmentation time and graph stats.
print_results(helper_stats[2:])
# Clear memory.
del helper
We add system information to the results and save them as CSV.
timestr = time.strftime("%Y%m%d-%H%M%S")
# Create and save dataframe.
df = pd.DataFrame(data=helper_stats_list, columns=['Name', 'NucleiCount', 'Class', 'NodesCount', 'TermCount', 'BuildTime', 'SolveTime', 'WeakPersistenciesTime', 'TwiceEnergy', 'Timestamp'])
df['BuildTime'] = df['BuildTime'] / 10**9
df['SolveTime'] = df['SolveTime'] / 10**9
df['WeakPersistenciesTime'] = df['WeakPersistenciesTime'] / 10**9
df['CpuCount'] = np.array(cpu_counts_list, dtype=np.int16)
df['ShortName'] = df['Class'].str[:4]
df.loc[df['CpuCount'] != -1, 'ShortName'] += ' (' + df['CpuCount'].astype(np.str) + ')'
df['SystemName'] = platform.node()
try:
import cpuinfo
info = cpuinfo.get_cpu_info()
df['SystemCpu'] = info['brand_raw']
except:
df['SystemCpu'] = platform.processor()
df['SystemCpuCount'] = np.array(os.cpu_count(), np.int16)
df_full = df
df_full.to_csv(os.path.join(results_dir, f'parallel_qpbo_nuclei_benchmark_results_{timestr}.csv'))
# Save masks.
np.savez_compressed(os.path.join(comparison_dir, f'pred_radial_dic_{timestr}.npz'), **pred_radial_dic)
np.savez_compressed(os.path.join(comparison_dir, f'points_radial_dic_{timestr}.npz'), **points_radial_dic)
# Display dataframe.
df_full
df_full.groupby(['Name', 'NucleiCount', 'Class', 'CpuCount'])['SolveTime'].describe()
df_full.groupby(['NucleiCount', 'Class', 'CpuCount'])['SolveTime'].describe()