Faster Multi-Object Segmentation using Parallel Quadratic Pseudo-Boolean Optimization, ICCV 2021 Paper
Authors: Niels Jeppesen (niejep@dtu.dk) and Patrick M. Jensen (patmjen@dtu.dk)
This notebook is used to analyze the benchmark results from the ParallelNerveSegmentation3DPart1.ipynb notebook. The benchmark is testing the performance of three different QPBO implementations: K-QPBO, M-QPBO and P-QPBO. The K-QPBO imlementation found in the thinqpbo package, which is almost identical to the original implementation by Vladimir Kolmogorov. P-QPBP is our new parallel QPBO implementation and M-QPBO is our serial QPBO implementation.
import os
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.optimize
First we load the benchmark results from the CSV files. Once we've loaded the results we display the dataframe. Change the variables in the cell below to save figures or change directories.
# Should figures be saved?
save_figures = False
# Directory to save figures.
figure_dir = 'figures'
# Directory with benchmark results.
benchmark_dir = '../benchmark/nerve_benchmarks/'
benchmark_paths = glob(os.path.join(benchmark_dir, 'parallel_qpbo_benchmark_results_*.csv'))
benchmark_paths
df_all = pd.read_csv(benchmark_paths[0], index_col=0)
for p in benchmark_paths[1:]:
df_all = df_all.append(pd.read_csv(p, index_col=0), ignore_index=True)
df_all
To get an overview of the data we've loaded, we print the different configurations.
print('Classes:')
for n in df_all['Class'].unique().tolist():
print(f'\t{n}')
print('Nerve counts:')
for n in df_all['NerveCount'].unique().tolist():
print(f'\t{n}')
print('Node counts:')
for n in df_all['NodeCount'].unique().tolist():
print(f'\t{n}')
print('Edge counts:')
for n in df_all['EdgeCount'].unique().tolist():
print(f'\t{n}')
print('CPU counts:')
for n in df_all['SystemCpuCount'].unique().tolist():
print(f'\t{n}')
For the purpose of plotting, we update the ShotName column values.
df = df_all.copy()
df.loc[df['Class'] == 'QPBOInt', 'ShortName'] = 'K-QPBO'
df.loc[df['Class'].str.startswith('QpboCap'), 'ShortName'] = 'M-QPBO'
df.loc[df['Class'].str.startswith('ParallelQpboCap'), 'ShortName'] = 'P-QPBO'
df.loc[df['CpuCount'] != -1, 'ShortName'] += ' (' + df['CpuCount'].astype(np.str) + ')'
df['SystemCpuCount'] = df['SystemCpuCount'].astype(np.int16)
df['TotalTime'] = df['BuildTime'] + df['SolveTime'] + df['WeakPersistenciesTime']
We only want to work with results from a specific system and configuration, so we filter out other. This should have no effect for the data included in the supplementary material.
mask = df['NerveCount'] == 216
mask &= df['SystemCpu'].str.contains('Gold 6226R')
mask &= (~df['Class'].str.contains('CapInt') | df['Class'].str.contains('CapInt32'))
df = df[mask]
We can now group the data to get an overview of the solve times for N1 and N2 for each algorithm and thread configuration. The results are used in the paper, where we report the minimum (best) solve time for each group.
df_group = df.groupby(['NodeCount', 'EdgeCount', 'Class', 'SystemCpu', 'CpuCount'])
df_group[['SolveTime']].describe()
To plot the results, we define a plot function.
def time_plot(df, column, label, title=None, filename=None, relative_to=None, speedup=False, per_thread=False, return_ax=False, ax=None, plot_label=None, amdahl_fit=False):
"""Used for plotting results of the N1 and N2 segmentation tasks."""
# Copy dataframe to avoid changing original.
df = df.copy()
pqpbo_only = False
max_value = False
if relative_to is not None:
# If plotting the data relative to a specific configuration (e.g. K-QPBO).
df[column] /= df[df['ShortName'] == relative_to][column].min()
if speedup:
# If we're plotting th speed-up we inverse the relative values. We will then be taking the max, rather than min values.
max_value = True
df[column] = 1 / df[column]
if per_thread:
# If we're calculating speed-up per thread (efficiency).
pqpbo_only = True
cpu_mask = df['CpuCount'] > 1
df.loc[cpu_mask, column] /= df.loc[cpu_mask, 'CpuCount']
# Separate data.
df_qpbo = df[df['Class'].str.startswith('QPBO')]
df_mqpbo = df[df['Class'].str.startswith('QpboCap')]
df = df[df['Class'].str.startswith('ParallelQpboCap')]
# Group data.
df_group = df.groupby(['Class', 'SystemCpu', 'CpuCount'], sort=False)
df_group.describe()
# Create new plot or re-use.
if ax is None:
fig, ax = plt.subplots(1, 1, figsize=(5, 2.5))
style = '.-'
else:
style = '*-'
color = None
if not pqpbo_only:
ax.axhline(df_qpbo['SolveTime'].min(), color=plt.cm.Set1(0), ls='--', label=df_qpbo['ShortName'].iloc[0])
ax.axhline(df_mqpbo['SolveTime'].min(), color=plt.cm.Set1(1), ls=':', label=df_mqpbo['ShortName'].iloc[0])
color = plt.cm.Set1(2)
# Take min or max value.
if max_value:
agg = df_group[column].max().reset_index()
else:
agg = df_group[column].min().reset_index()
if relative_to is not None and speedup and per_thread:
agg['SolveTime'] /= agg['SolveTime'].max()
ax.set_ylim(0, 1)
ax.set_yticks(np.arange(0, 1.1, .1))
# Plot data.
agg.plot(ax=ax, x='CpuCount', y='SolveTime', style=style, color=color, label='P-QPBO' if plot_label is None else plot_label)
if return_ax:
return ax
if relative_to is not None and speedup and not per_thread:
if amdahl_fit:
amdahl_p = scipy.optimize.least_squares(lambda p: agg['SolveTime']/agg['SolveTime'][0] - 1/(1 - p + p/agg['CpuCount']), 0).x[0]
plt.plot(agg['CpuCount'], agg['SolveTime'][0]/(1 - amdahl_p + amdahl_p/agg['CpuCount']), '--', color='gray', label=f"Amdahl's law fit (p={amdahl_p:3.2f})")
ymax = int(ax.get_ylim()[1] + 0.5)
ax.set_ylim(0, ymax)
ax.set_yticks(np.arange(0, ymax + 1, 1), minor=True)
ax.set_xlim(df['CpuCount'].min() - 1, df['CpuCount'].max() + 1)
ax.legend()
ax.grid(axis='y')
ax.set_ylabel(label)
ax.set_xticks(df['CpuCount'])
ax.set_xlabel('Number of threads')
plt.tight_layout()
if save_figures and filename is not None:
plt.savefig(os.path.join(figure_dir, filename))
ax.set_title(title)
plt.show()
Let's plot the results for the N1 task. The third plot is used in the paper.
# Keep results from the smallest task (N1).
mask = df['NodeCount'] == df['NodeCount'].min()
df_n1 = df[mask]
# Append to figure file name.
fig_append = '_n1'
# Create title (only used in notebook).
title = f"{df_n1['SystemCpu'].iloc[0].split('@')[0].strip()} ({df_n1['SystemCpuCount'].iloc[0]} CPUs), 32-bit capacities"
# Create plot with solve time.
time_plot(df_n1, 'SolveTime', 'Solve time (s)', title=title, filename=f'plot_qpbo_solve_cap32_xeon_gold_6226r{fig_append}.pdf')
# Create plot with relative solve time.
time_plot(df_n1, 'SolveTime', 'Solve time (relative)', title=title, filename=f'plot_qpbo_solve_cap32_xeon_gold_6226r_rel{fig_append}.pdf', relative_to='K-QPBO')
# Create plot with relative speed-up.
time_plot(df_n1, 'SolveTime', 'Solve time speed-up (times)', title=title, filename=f'plot_qpbo_solve_cap32_xeon_gold_6226r_relx{fig_append}_amdahl1.pdf', relative_to='K-QPBO', speedup=True, amdahl_fit=True)
And then the results for the N2 task. The third plot is used in the paper.
# Keep results from the largest task (N2).
mask = df['NodeCount'] == df['NodeCount'].max()
df_n2 = df[mask]
# Append to figure file name.
fig_append = '_n2'
# Create title (only used in notebook).
title = f"{df_n2['SystemCpu'].iloc[0].split('@')[0].strip()} ({df_n2['SystemCpuCount'].iloc[0]} CPUs), 32-bit capacities"
# Create plot with solve time.
time_plot(df_n2, 'SolveTime', 'Solve time (s)', title=title, filename=f'plot_qpbo_solve_cap32_xeon_gold_6226r{fig_append}.pdf')
# Create plot with relative solve time.
time_plot(df_n2, 'SolveTime', 'Solve time (relative)', title=title, filename=f'plot_qpbo_solve_cap32_xeon_gold_6226r_rel{fig_append}.pdf', relative_to='K-QPBO')
# Create plot with relative speed-up.
time_plot(df_n2, 'SolveTime', 'Solve time speed-up (times)', title=title, filename=f'plot_qpbo_solve_cap32_xeon_gold_6226r_relx{fig_append}_amdahl1.pdf', relative_to='K-QPBO', speedup=True, amdahl_fit=True)
We can also compare the per thread efficiency for each task. The baseline for each task is the time it takes for P-QPBO(1). If P-QPBO(20) is 20 times faster than P-QPBO(1) is has an efficincy of one. If it is 10 times faster the effeciency is 0.5, and so on.
# Create title (only used in notebook).
title = f"{df['SystemCpu'].iloc[0].split('@')[0].strip()} ({df['SystemCpuCount'].iloc[0]} CPUs), 32-bit capacities"
# Create efficiency plot for both tasks.
ax = time_plot(df_n1, 'SolveTime', 'Thread effciency (relative)', relative_to='K-QPBO', speedup=True, per_thread=True, return_ax=True, plot_label='P-QPBO(N1)')
time_plot(df_n2, 'SolveTime', 'Thread effciency (relative)', title=title, filename=f'plot_qpbo_solve_cap32_xeon_gold_6226r_relxt.pdf', relative_to='K-QPBO', speedup=True, per_thread=True, ax=ax, plot_label='P-QPBO(N2)')
Get speed-up numbers to use in text and tables.
df_group = df_n1.groupby(['Class', 'SystemCpu', 'CpuCount'], sort=False)
df = df_group['SolveTime'].min().reset_index()
ref_val = df[df['Class'] == 'QPBOInt']['SolveTime'].iloc[0]
df['SpeedUp'] = ref_val / df['SolveTime']
df['ReductionK'] = (ref_val - df['SolveTime']) / ref_val
ref_val = df[df['Class'] == 'QpboCapInt32ArcIdxUInt32NodeIdxUInt32']['SolveTime'].iloc[0]
df['ReductionM'] = (ref_val - df['SolveTime']) / ref_val
df.style.highlight_max(subset=['SpeedUp'], color='yellow')
df_group = df_n2.groupby(['Class', 'SystemCpu', 'CpuCount'], sort=False)
df = df_group['SolveTime'].min().reset_index()
ref_val = df[df['Class'] == 'QPBOInt']['SolveTime'].iloc[0]
df['SpeedUp'] = ref_val / df['SolveTime']
df['ReductionK'] = (ref_val - df['SolveTime']) / ref_val
ref_val = df[df['Class'] == 'QpboCapInt32ArcIdxUInt64NodeIdxUInt32']['SolveTime'].iloc[0]
df['ReductionM'] = (ref_val - df['SolveTime']) / ref_val
df.style.highlight_max(subset=['SpeedUp'], color='yellow')