import numpy as np
def nms_fast(boxes, scores, iou_thr, score_thr=0.0):
"""
Perform fast Non-Maximum Suppression (NMS) on a list of bounding boxes.
Args:
boxes (np.ndarray): An array of bounding boxes in the format [x1, y1, x2, y2].
scores (np.ndarray): Array of scores for each box.
iou_thr (float): The IOU threshold to determine if boxes should be suppressed.
score_thr (float): Score threshold for suppressing low-scoring boxes.
Returns:
List[int]: Indices of boxes that are kept.
"""
# Filter out boxes with scores below the threshold
valid_boxes = np.where(scores >= score_thr)[0]
boxes = boxes[valid_boxes]
scores = scores[valid_boxes]
if len(boxes) == 0:
return []
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# Initialize the list of picked indexes
pick = []
# Grab the coordinates of the bounding boxes
x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
# Compute the area of the bounding boxes
area = (x2 - x1) * (y2 - y1)
# Sort the boxes by their scores in descending order
idxs = np.argsort(scores)[::-1]
while len(idxs) > 0:
i = idxs[0]
pick.append(i)
xx1 = np.maximum(x1[i], x1[idxs[1:]])
yy1 = np.maximum(y1[i], y1[idxs[1:]])
xx2 = np.minimum(x2[i], x2[idxs[1:]])
yy2 = np.minimum(y2[i], y2[idxs[1:]])
w = np.maximum(0, xx2 - xx1)
h = np.maximum(0, yy2 - yy1)
# Calculate the IOU
inter_area = w * h
eps = 1e-16 # to avoid division by zero
union_area = area[i] + area[idxs[1:]] - inter_area + eps
iou = inter_area / union_area
# Keep only boxes with an IOU less than the threshold
idxs = idxs[np.where(iou < iou_thr)[0] + 1]
return pick
def benchmark_nms_methods(box_counts):
from time import perf_counter
import powerboxes as pb
import cv2
pb_nms_times = []
pb_rtree_nms_times = []
numpy_nms_times = []
opencv_nms_times = []
image_size = 10_000
iou_thr = 0.5
for n_predictions in box_counts:
topleft = np.random.randint(0, image_size, size=(n_predictions, 2))
width_height = np.random.randint(0,
image_size,
size=(n_predictions, 2))
boxes = np.concatenate([topleft, topleft + width_height], axis=1)
scores = np.random.uniform(0, 1, size=(n_predictions, ))
# PowerBoxes NMS
start = perf_counter()
for _ in range(10):
keep = pb.nms(boxes, scores, iou_thr, score_threshold=0.0)
end = perf_counter()
pb_nms_times.append((end - start) / 10)
# PowerBoxes RTree NMS
start = perf_counter()
for _ in range(10):
keep = pb.rtree_nms(boxes, scores, iou_thr, score_threshold=0.0)
end = perf_counter()
pb_rtree_nms_times.append((end - start) / 10)
# Numpy NMS
start = perf_counter()
for _ in range(10):
keep = nms_fast(boxes, scores, iou_thr)
end = perf_counter()
numpy_nms_times.append((end - start) / 10)
# OpenCV NMS
start = perf_counter()
for _ in range(10):
keep = cv2.dnn.NMSBoxes(boxes.tolist(), scores.tolist(), 0.0,
iou_thr)
end = perf_counter()
opencv_nms_times.append((end - start) / 10)
return pb_nms_times, pb_rtree_nms_times, numpy_nms_times, opencv_nms_times
if __name__ == '__main__':
from matplotlib import pyplot as plt
box_counts = [10, 100, 1_000, 10_000, 30_000, 50_000]
pb_nms_times, pb_rtree_nms_times, numpy_nms_times, opencv_nms_times = benchmark_nms_methods(
box_counts)
# Width of the bars
bar_width = 0.2
# Setting the position of the bars
r1 = np.arange(len(pb_nms_times))
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]
r4 = [x + bar_width for x in r3]
# Creating the bar chart
plt.figure(figsize=(14, 8))
plt.bar(r1,
pb_nms_times,
color='blue',
width=bar_width,
edgecolor='grey',
label='PowerBoxes NMS')
plt.bar(r2,
pb_rtree_nms_times,
color='red',
width=bar_width,
edgecolor='grey',
label='PowerBoxes RTree NMS')
plt.bar(r3,
numpy_nms_times,
color='green',
width=bar_width,
edgecolor='grey',
label='Numpy NMS')
plt.bar(r4,
opencv_nms_times,
color='orange',
width=bar_width,
edgecolor='grey',
label='OpenCV NMS')
# Adding labels
plt.xlabel('Number of Boxes', fontweight='bold')
plt.ylabel('Average Time per Run (seconds)', fontweight='bold')
plt.xticks([r + bar_width for r in range(len(pb_nms_times))], box_counts)
plt.yscale('log')
# Creating a legend and showing the plot
plt.title('NMS Methods Performance Comparison')
plt.legend()
plt.savefig('nms_benchmark.png')
plt.show()
For 100 runs on 11th gen Intel Core i7 (2.8GHz) with 4 cores and 8 Logical Processors the results are shown below. For smaller number of boxes, powerboxes is indeed faster than the alternatives.