|
2 | 2 |
|
3 | 3 | Hacked together by Ross Wightman
|
4 | 4 | """
|
5 |
| -import torch |
6 |
| -from PIL import Image |
7 |
| -import numpy as np |
8 | 5 | import random
|
9 | 6 | import math
|
| 7 | +from copy import deepcopy |
| 8 | + |
| 9 | +from PIL import Image |
| 10 | +import numpy as np |
| 11 | +import torch |
10 | 12 |
|
11 | 13 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
|
12 | 14 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
|
@@ -91,13 +93,13 @@ def __call__(self, img, anno: dict):
|
91 | 93 | new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
|
92 | 94 | interp_method = _pil_interp(self.interpolation)
|
93 | 95 | img = img.resize((scaled_w, scaled_h), interp_method)
|
94 |
| - new_img.paste(img) |
| 96 | + new_img.paste(img) # pastes at 0,0 (upper-left corner) |
95 | 97 |
|
96 | 98 | if 'bbox' in anno:
|
97 |
| - # FIXME haven't tested this path since not currently using dataset annotations for train/eval |
98 | 99 | bbox = anno['bbox']
|
99 | 100 | bbox[:, :4] *= img_scale
|
100 |
| - clip_boxes_(bbox, (scaled_h, scaled_w)) |
| 101 | + bbox_bound = (min(scaled_h, self.target_size[0]), min(scaled_w, self.target_size[1])) |
| 102 | + clip_boxes_(bbox, bbox_bound) # crop to bounds of target image or letter-box, whichever is smaller |
101 | 103 | valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
|
102 | 104 | anno['bbox'] = bbox[valid_indices, :]
|
103 | 105 | anno['cls'] = anno['cls'][valid_indices]
|
@@ -151,15 +153,15 @@ def __call__(self, img, anno: dict):
|
151 | 153 | right, lower = min(scaled_w, offset_x + self.target_size[1]), min(scaled_h, offset_y + self.target_size[0])
|
152 | 154 | img = img.crop((offset_x, offset_y, right, lower))
|
153 | 155 | new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
|
154 |
| - new_img.paste(img) |
| 156 | + new_img.paste(img) # pastes at 0,0 (upper-left corner) |
155 | 157 |
|
156 | 158 | if 'bbox' in anno:
|
157 |
| - # FIXME not fully tested |
158 |
| - bbox = anno['bbox'].copy() # FIXME copy for debugger inspection, back to inplace |
| 159 | + bbox = anno['bbox'] # for convenience, modifies in-place |
159 | 160 | bbox[:, :4] *= img_scale
|
160 | 161 | box_offset = np.stack([offset_y, offset_x] * 2)
|
161 | 162 | bbox -= box_offset
|
162 |
| - clip_boxes_(bbox, (scaled_h, scaled_w)) |
| 163 | + bbox_bound = (min(scaled_h, self.target_size[0]), min(scaled_w, self.target_size[1])) |
| 164 | + clip_boxes_(bbox, bbox_bound) # crop to bounds of target image or letter-box, whichever is smaller |
163 | 165 | valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
|
164 | 166 | anno['bbox'] = bbox[valid_indices, :]
|
165 | 167 | anno['cls'] = anno['cls'][valid_indices]
|
|
0 commit comments