Skip to content

Commit 1d73468

Browse files
authored
Merge pull request #2012 from jackiehimel/fix/bugs-1959-2000
fix: correct numpy indexing in denormalize_boxes and add ultralytics validation
2 parents 9320be5 + dfd2a60 commit 1d73468

File tree

4 files changed

+162
-63
lines changed

4 files changed

+162
-63
lines changed

supervision/detection/core.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -296,18 +296,24 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
296296
class_id=np.arange(len(ultralytics_results)),
297297
)
298298

299-
class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
300-
class_names = np.array([ultralytics_results.names[i] for i in class_id])
301-
return cls(
302-
xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
303-
confidence=ultralytics_results.boxes.conf.cpu().numpy(),
304-
class_id=class_id,
305-
mask=extract_ultralytics_masks(ultralytics_results),
306-
tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()
307-
if ultralytics_results.boxes.id is not None
308-
else None,
309-
data={CLASS_NAME_DATA_FIELD: class_names},
310-
)
299+
if (
300+
hasattr(ultralytics_results, "boxes")
301+
and ultralytics_results.boxes is not None
302+
):
303+
class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
304+
class_names = np.array([ultralytics_results.names[i] for i in class_id])
305+
return cls(
306+
xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
307+
confidence=ultralytics_results.boxes.conf.cpu().numpy(),
308+
class_id=class_id,
309+
mask=extract_ultralytics_masks(ultralytics_results),
310+
tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()
311+
if ultralytics_results.boxes.id is not None
312+
else None,
313+
data={CLASS_NAME_DATA_FIELD: class_names},
314+
)
315+
316+
return cls.empty()
311317

312318
@classmethod
313319
def from_yolo_nas(cls, yolo_nas_results) -> Detections:

supervision/detection/utils/boxes.py

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -95,60 +95,67 @@ def pad_boxes(xyxy: np.ndarray, px: int, py: int | None = None) -> np.ndarray:
9595

9696

9797
def denormalize_boxes(
98-
normalized_xyxy: np.ndarray,
98+
xyxy: np.ndarray,
9999
resolution_wh: tuple[int, int],
100100
normalization_factor: float = 1.0,
101101
) -> np.ndarray:
102102
"""
103-
Converts normalized bounding box coordinates to absolute pixel values.
103+
Convert normalized bounding box coordinates to absolute pixel coordinates.
104+
105+
Multiplies each bounding box coordinate by image size and divides by
106+
`normalization_factor`, mapping values from normalized `[0, normalization_factor]`
107+
to absolute pixel values for a given resolution.
104108
105109
Args:
106-
normalized_xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each row
107-
contains normalized coordinates in the format `(x_min, y_min, x_max, y_max)`,
108-
with values between 0 and `normalization_factor`.
109-
resolution_wh (Tuple[int, int]): A tuple `(width, height)` representing the
110-
target image resolution.
111-
normalization_factor (float, optional): The normalization range of the input
112-
coordinates. Defaults to 1.0.
110+
xyxy (`numpy.ndarray`): Normalized bounding boxes of shape `(N, 4)`,
111+
where each row is `(x_min, y_min, x_max, y_max)`, values in
112+
`[0, normalization_factor]`.
113+
resolution_wh (`tuple[int, int]`): Target image resolution as `(width, height)`.
114+
normalization_factor (`float`): Maximum value of input coordinate range.
115+
Defaults to `1.0`.
113116
114117
Returns:
115-
np.ndarray: An array of shape `(N, 4)` with absolute coordinates in
118+
(`numpy.ndarray`): Array of shape `(N, 4)` with absolute coordinates in
116119
`(x_min, y_min, x_max, y_max)` format.
117120
118121
Examples:
119122
```python
120123
import numpy as np
121124
import supervision as sv
122125
123-
# Default normalization (0-1)
124-
normalized_xyxy = np.array([
126+
xyxy = np.array([
125127
[0.1, 0.2, 0.5, 0.6],
126-
[0.3, 0.4, 0.7, 0.8]
128+
[0.3, 0.4, 0.7, 0.8],
129+
[0.2, 0.1, 0.6, 0.5]
127130
])
128-
resolution_wh = (100, 200)
129-
sv.denormalize_boxes(normalized_xyxy, resolution_wh)
131+
132+
sv.denormalize_boxes(xyxy, (1280, 720))
130133
# array([
131-
# [ 10., 40., 50., 120.],
132-
# [ 30., 80., 70., 160.]
134+
# [128., 144., 640., 432.],
135+
# [384., 288., 896., 576.],
136+
# [256., 72., 768., 360.]
133137
# ])
138+
```
134139
135-
# Custom normalization (0-100)
136-
normalized_xyxy = np.array([
137-
[10., 20., 50., 60.],
138-
[30., 40., 70., 80.]
140+
```
141+
import numpy as np
142+
import supervision as sv
143+
144+
xyxy = np.array([
145+
[256., 128., 768., 640.]
139146
])
140-
sv.denormalize_boxes(normalized_xyxy, resolution_wh, normalization_factor=100.0)
147+
148+
sv.denormalize_boxes(xyxy, (1280, 720), normalization_factor=1024.0)
141149
# array([
142-
# [ 10., 40., 50., 120.],
143-
# [ 30., 80., 70., 160.]
150+
# [320., 90., 960., 450.]
144151
# ])
145152
```
146-
""" # noqa E501 // docs
153+
"""
147154
width, height = resolution_wh
148-
result = normalized_xyxy.copy()
155+
result = xyxy.copy()
149156

150-
result[[0, 2]] = (result[[0, 2]] * width) / normalization_factor
151-
result[[1, 3]] = (result[[1, 3]] * height) / normalization_factor
157+
result[:, [0, 2]] = (result[:, [0, 2]] * width) / normalization_factor
158+
result[:, [1, 3]] = (result[:, [1, 3]] * height) / normalization_factor
152159

153160
return result
154161

supervision/detection/vlm.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -538,26 +538,24 @@ def from_google_gemini_2_0(
538538
return np.empty((0, 4)), None, np.empty((0,), dtype=str)
539539

540540
labels = []
541-
boxes_list = []
541+
xyxy = []
542542

543543
for item in data:
544544
if "box_2d" not in item or "label" not in item:
545545
continue
546546
labels.append(item["label"])
547547
box = item["box_2d"]
548548
# Gemini bbox order is [y_min, x_min, y_max, x_max]
549-
boxes_list.append(
550-
denormalize_boxes(
551-
np.array([box[1], box[0], box[3], box[2]]).astype(np.float64),
552-
resolution_wh=(w, h),
553-
normalization_factor=1000,
554-
)
555-
)
549+
xyxy.append([box[1], box[0], box[3], box[2]])
556550

557-
if not boxes_list:
551+
if len(xyxy) == 0:
558552
return np.empty((0, 4)), None, np.empty((0,), dtype=str)
559553

560-
xyxy = np.array(boxes_list)
554+
xyxy = denormalize_boxes(
555+
np.array(xyxy, dtype=np.float64),
556+
resolution_wh=(w, h),
557+
normalization_factor=1000,
558+
)
561559
class_name = np.array(labels)
562560
class_id = None
563561

@@ -649,10 +647,10 @@ def from_google_gemini_2_5(
649647
box = item["box_2d"]
650648
# Gemini bbox order is [y_min, x_min, y_max, x_max]
651649
absolute_bbox = denormalize_boxes(
652-
np.array([box[1], box[0], box[3], box[2]]).astype(np.float64),
650+
np.array([[box[1], box[0], box[3], box[2]]]).astype(np.float64),
653651
resolution_wh=(w, h),
654652
normalization_factor=1000,
655-
)
653+
)[0]
656654
boxes_list.append(absolute_bbox)
657655

658656
if "mask" in item:
@@ -735,7 +733,7 @@ def from_google_gemini_2_5(
735733
def from_moondream(
736734
result: dict,
737735
resolution_wh: tuple[int, int],
738-
) -> tuple[np.ndarray]:
736+
) -> np.ndarray:
739737
"""
740738
Parse and scale bounding boxes from moondream JSON output.
741739
@@ -773,7 +771,7 @@ def from_moondream(
773771
if "objects" not in result or not isinstance(result["objects"], list):
774772
return np.empty((0, 4), dtype=float)
775773

776-
denormalize_xyxy = []
774+
xyxy = []
777775

778776
for item in result["objects"]:
779777
if not all(k in item for k in ["x_min", "y_min", "x_max", "y_max"]):
@@ -784,14 +782,12 @@ def from_moondream(
784782
x_max = item["x_max"]
785783
y_max = item["y_max"]
786784

787-
denormalize_xyxy.append(
788-
denormalize_boxes(
789-
np.array([x_min, y_min, x_max, y_max]).astype(np.float64),
790-
resolution_wh=(w, h),
791-
)
792-
)
785+
xyxy.append([x_min, y_min, x_max, y_max])
793786

794-
if not denormalize_xyxy:
787+
if len(xyxy) == 0:
795788
return np.empty((0, 4))
796789

797-
return np.array(denormalize_xyxy, dtype=float)
790+
return denormalize_boxes(
791+
np.array(xyxy).astype(np.float64),
792+
resolution_wh=(w, h),
793+
)

test/detection/utils/test_boxes.py

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@
55
import numpy as np
66
import pytest
77

8-
from supervision.detection.utils.boxes import clip_boxes, move_boxes, scale_boxes
8+
from supervision.detection.utils.boxes import (
9+
clip_boxes,
10+
denormalize_boxes,
11+
move_boxes,
12+
scale_boxes,
13+
)
914

1015

1116
@pytest.mark.parametrize(
@@ -142,3 +147,88 @@ def test_scale_boxes(
142147
with exception:
143148
result = scale_boxes(xyxy=xyxy, factor=factor)
144149
assert np.array_equal(result, expected_result)
150+
151+
152+
@pytest.mark.parametrize(
153+
"xyxy, resolution_wh, normalization_factor, expected_result, exception",
154+
[
155+
(
156+
np.empty(shape=(0, 4)),
157+
(1280, 720),
158+
1.0,
159+
np.empty(shape=(0, 4)),
160+
DoesNotRaise(),
161+
), # empty array
162+
(
163+
np.array([[0.1, 0.2, 0.5, 0.6]]),
164+
(1280, 720),
165+
1.0,
166+
np.array([[128.0, 144.0, 640.0, 432.0]]),
167+
DoesNotRaise(),
168+
), # single box with default normalization
169+
(
170+
np.array([[0.1, 0.2, 0.5, 0.6], [0.3, 0.4, 0.7, 0.8]]),
171+
(1280, 720),
172+
1.0,
173+
np.array([[128.0, 144.0, 640.0, 432.0], [384.0, 288.0, 896.0, 576.0]]),
174+
DoesNotRaise(),
175+
), # two boxes with default normalization
176+
(
177+
np.array(
178+
[[0.1, 0.2, 0.5, 0.6], [0.3, 0.4, 0.7, 0.8], [0.2, 0.1, 0.6, 0.5]]
179+
),
180+
(1280, 720),
181+
1.0,
182+
np.array(
183+
[
184+
[128.0, 144.0, 640.0, 432.0],
185+
[384.0, 288.0, 896.0, 576.0],
186+
[256.0, 72.0, 768.0, 360.0],
187+
]
188+
),
189+
DoesNotRaise(),
190+
), # three boxes - regression test for issue #1959
191+
(
192+
np.array([[10.0, 20.0, 50.0, 60.0]]),
193+
(100, 200),
194+
100.0,
195+
np.array([[10.0, 40.0, 50.0, 120.0]]),
196+
DoesNotRaise(),
197+
), # single box with custom normalization factor
198+
(
199+
np.array([[10.0, 20.0, 50.0, 60.0], [30.0, 40.0, 70.0, 80.0]]),
200+
(100, 200),
201+
100.0,
202+
np.array([[10.0, 40.0, 50.0, 120.0], [30.0, 80.0, 70.0, 160.0]]),
203+
DoesNotRaise(),
204+
), # two boxes with custom normalization factor
205+
(
206+
np.array([[0.0, 0.0, 1.0, 1.0]]),
207+
(1920, 1080),
208+
1.0,
209+
np.array([[0.0, 0.0, 1920.0, 1080.0]]),
210+
DoesNotRaise(),
211+
), # full frame box
212+
(
213+
np.array([[0.5, 0.5, 0.5, 0.5]]),
214+
(640, 480),
215+
1.0,
216+
np.array([[320.0, 240.0, 320.0, 240.0]]),
217+
DoesNotRaise(),
218+
), # zero-area box (point)
219+
],
220+
)
221+
def test_denormalize_boxes(
222+
xyxy: np.ndarray,
223+
resolution_wh: tuple[int, int],
224+
normalization_factor: float,
225+
expected_result: np.ndarray,
226+
exception: Exception,
227+
) -> None:
228+
with exception:
229+
result = denormalize_boxes(
230+
xyxy=xyxy,
231+
resolution_wh=resolution_wh,
232+
normalization_factor=normalization_factor,
233+
)
234+
assert np.allclose(result, expected_result)

0 commit comments

Comments
 (0)