Skip to content

Commit b912a56

Browse files
author
Agustín Castro
committed
Use motion estimator and mask generator when using videos
1 parent 2daf7ba commit b912a56

File tree

1 file changed

+184
-21
lines changed

1 file changed

+184
-21
lines changed

norfair/common_reference_ui.py

Lines changed: 184 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,54 @@ def set_reference(
4545
reference: str,
4646
footage: str,
4747
transformation_getter: TransformationGetter = None,
48-
detector=None,
48+
mask_generator=None,
4949
desired_size=700,
5050
motion_estimator=None,
5151
):
52+
"""
53+
Get a transformation to relate the coordinate transformations between footage absolute frame (first image in footage) and reference absolute frame (first image in reference).
54+
55+
UI usage:
56+
57+
Creates a UI to annotate points that match in reference and footage, and estimate the transformation.
58+
To add a point, just click a pair of points (one from the footage window, and another from the reference window) and select "Add"
59+
To remove a point, just select the corresponding point at the bottom left corner, and select "Remove".
60+
61+
If either footage or reference are videos, you can jump to future frames to pick points that match.
62+
For example, to jump 215 frames in the footage, just write an integer number of frames to jump next to 'Frames to skip (footage)', and select "Skip frames".
63+
A motion estimator can be used to relate the coordinates of the current frame you see (in either footage or reference) to coordinates in its corresponding first frame.
64+
65+
Once a transformation has been estimated, you can test it:
66+
To Test your transformation, Select the 'Test' mode, and pick a point in either the reference or the footage, and see the associated point in the other window.
67+
You can keep adding more associated points until you are satisfied with the estimated transformation
68+
69+
Argument:
70+
- reference: str
71+
Path to the reference image or video
72+
73+
- footage: str
74+
Path to the footage image or video
75+
76+
- transformation_getter: TransformationGetter, optional
77+
TransformationGetter defining the type of transformation you want to fix between reference and footage.
78+
Since the transformation can be really far from identity (given that the perspectives in footage and reference can be immensely different),
79+
and also knowing that outliers shouldn't be common given that a human is picking the points, it is recommended to use a high ransac_reproj_threshold (~ 1000)
80+
81+
- mask_generator: optional function that creates a mask (np.ndarray) from a PIL image. This mask is then provided to the corresponding MotionEstimator to avoid
82+
sampling points within the mask.
83+
84+
- desired_size: int, optional
85+
How large you want the clickable windows in the UI to be.
86+
87+
- motion_estimator: MotionEstimator, optional
88+
When using videos for either the footage or the reference, you can provide a MotionEstimator to relate the coordinates in all the frames in the video.
89+
The motion estimator is only useful if the camera in either the video of the footage or the video of the reference can move. Otherwise, avoid using it.
90+
91+
returns: CoordinatesTransformation instance
92+
The provided transformation_getter will fit a transformation from the reference (as 'absolute') to the footage (as 'relative').
93+
CoordinatesTransformation.abs_to_rel will give the transformation from the first frame in the reference to the first frame in the footage.
94+
CoordinatesTransformation.rel_to_abs will give the transformation from the first frame in the footage to the first frame in the reference.
95+
"""
5296

5397
global window
5498

@@ -132,9 +176,22 @@ def estimate_transformation(points):
132176
return None
133177

134178
def test_transformation(
135-
change_of_coordinates, canvas, point, original_size, canvas_size
179+
change_of_coordinates,
180+
canvas,
181+
point,
182+
original_size,
183+
canvas_size,
184+
motion_transformation=None,
136185
):
137186
point_in_new_coordinates = change_of_coordinates(np.array([point]))[0]
187+
188+
try:
189+
point_in_new_coordinates = motion_transformation.abs_to_rel(
190+
np.array([point_in_new_coordinates])
191+
)[0]
192+
except AttributeError:
193+
pass
194+
138195
point_in_canvas_coordinates = np.multiply(
139196
point_in_new_coordinates,
140197
np.array(
@@ -159,7 +216,7 @@ def draw_point_in_canvas(canvas, point, color="green"):
159216
tags="myPoint",
160217
)
161218

162-
######### MAKSE SUBBLOCK TO FINISH
219+
######### MAKE SUBBLOCK TO FINISH
163220

164221
frame_options_finish = tk.Frame(master=frame_options)
165222

@@ -201,15 +258,56 @@ def handle_finish():
201258
###### MAKE SUBBLOCK TO SEE POINTS AND CHOOSE THEM
202259
def handle_mark_annotation(key):
203260
def handle_annotation(event):
261+
global skipper
262+
global reference_original_size
263+
global reference_canvas_size
264+
global footage_original_size
265+
global footage_canvas_size
266+
204267
points[key]["marked"] = not points[key]["marked"]
205268

206269
if points[key]["marked"]:
207270
points[key]["button"].configure(fg="black", highlightbackground="red")
271+
272+
try:
273+
footage_point_in_rel_coords = skipper["footage"][
274+
"motion_transformation"
275+
].abs_to_rel(np.array([points[key]["footage"]]))[0]
276+
footage_point_in_rel_coords = np.multiply(
277+
footage_point_in_rel_coords,
278+
np.array(
279+
[
280+
footage_canvas_size[0] / footage_original_size[0],
281+
footage_canvas_size[1] / footage_original_size[1],
282+
]
283+
),
284+
).astype(int)
285+
except AttributeError:
286+
footage_point_in_rel_coords = points[key]["footage_canvas"]
287+
pass
288+
289+
try:
290+
reference_point_in_rel_coords = skipper["reference"][
291+
"motion_transformation"
292+
].abs_to_rel(np.array([points[key]["footage"]]))[0]
293+
reference_point_in_rel_coords = np.multiply(
294+
reference_point_in_rel_coords,
295+
np.array(
296+
[
297+
reference_canvas_size[0] / reference_original_size[0],
298+
reference_canvas_size[1] / reference_original_size[1],
299+
]
300+
),
301+
).astype(int)
302+
except AttributeError:
303+
reference_point_in_rel_coords = points[key]["reference_canvas"]
304+
pass
305+
208306
draw_point_in_canvas(
209-
canvas_footage, points[key]["footage_canvas"], color="red"
307+
canvas_footage, footage_point_in_rel_coords, color="red"
210308
)
211309
draw_point_in_canvas(
212-
canvas_reference, points[key]["reference_canvas"], color="red"
310+
canvas_reference, reference_point_in_rel_coords, color="red"
213311
)
214312
else:
215313
points[key]["button"].configure(
@@ -249,6 +347,8 @@ def handle_annotation(event):
249347
footage_point = None
250348
footage_point_canvas = None
251349

350+
motion_estimator_footage = None
351+
motion_transformation = None
252352
try:
253353
image = Image.open(footage)
254354
video = None
@@ -258,7 +358,15 @@ def handle_annotation(event):
258358
video = Video(input_path=footage)
259359
total_frames = int(video.video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
260360
fps = video.output_fps
261-
image = Image.fromarray(cv2.cvtColor(next(video.__iter__()), cv2.COLOR_BGR2RGB))
361+
image = cv2.cvtColor(next(video.__iter__()), cv2.COLOR_BGR2RGB)
362+
if motion_estimator is not None:
363+
motion_estimator_footage = deepcopy(motion_estimator)
364+
if mask_generator is not None:
365+
mask = mask_generator(image)
366+
else:
367+
mask = None
368+
motion_transformation = motion_estimator_footage.update(image, mask)
369+
image = Image.fromarray(image)
262370

263371
footage_original_width = image.width
264372
footage_original_height = image.height
@@ -284,13 +392,27 @@ def reference_coord_chosen_in_footage(event):
284392
global canvas_reference
285393
global reference_original_size
286394
global reference_canvas_size
287-
footage_point = (
288-
np.around(event.x * (footage_original_width / footage_canvas_width), 1),
289-
np.around(event.y * (footage_original_height / footage_canvas_height), 1),
290-
)
395+
global skipper
396+
291397
footage_point_canvas = (event.x, event.y)
292398
draw_point_in_canvas(canvas_footage, footage_point_canvas)
293-
print("Footage window clicked at: ", footage_point)
399+
400+
footage_point = np.array(
401+
[
402+
event.x * (footage_original_width / footage_canvas_width),
403+
event.y * (footage_original_height / footage_canvas_height),
404+
]
405+
)
406+
print("Footage window clicked at: ", footage_point.round(1))
407+
408+
try:
409+
footage_point = skipper["footage"]["motion_transformation"].rel_to_abs(
410+
np.array([footage_point])
411+
)[0]
412+
except AttributeError:
413+
pass
414+
415+
footage_point = footage_point.round(1)
294416

295417
if not mode_annotate:
296418
if transformation is not None:
@@ -300,6 +422,7 @@ def reference_coord_chosen_in_footage(event):
300422
footage_point,
301423
reference_original_size,
302424
reference_canvas_size,
425+
skipper["reference"]["motion_transformation"],
303426
)
304427
else:
305428
print("Can't test the transformation yet, still need more points")
@@ -314,12 +437,15 @@ def reference_coord_chosen_in_footage(event):
314437
"fps": fps,
315438
"button_skip": None,
316439
"entry_skip": None,
317-
"motion_estimator": None,
440+
"motion_estimator": motion_estimator_footage,
441+
"motion_transformation": motion_transformation,
318442
"canvas": canvas_footage,
319443
"image_container": footage_image_container,
320444
"current_frame_label": None,
321445
}
322446

447+
motion_estimator_reference = None
448+
motion_transformation = None
323449
try:
324450
image = Image.open(reference)
325451
video = None
@@ -329,7 +455,16 @@ def reference_coord_chosen_in_footage(event):
329455
video = Video(input_path=reference)
330456
total_frames = int(video.video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
331457
fps = video.output_fps
332-
image = Image.fromarray(cv2.cvtColor(next(video.__iter__()), cv2.COLOR_BGR2RGB))
458+
image = cv2.cvtColor(next(video.__iter__()), cv2.COLOR_BGR2RGB)
459+
if motion_estimator is not None:
460+
motion_estimator_reference = deepcopy(motion_estimator)
461+
if mask_generator is not None:
462+
mask = mask_generator(image)
463+
else:
464+
mask = None
465+
motion_transformation = motion_estimator_reference.update(image, mask)
466+
467+
image = Image.fromarray(image)
333468

334469
reference_original_width = image.width
335470
reference_original_height = image.height
@@ -355,16 +490,27 @@ def reference_coord_chosen_in_reference(event):
355490
global canvas_footage
356491
global footage_original_size
357492
global footage_canvas_size
493+
global skipper
358494

359-
reference_point = (
360-
np.around(event.x * (reference_original_width / reference_canvas_width), 1),
361-
np.around(
362-
event.y * (reference_original_height / reference_canvas_height), 1
363-
),
364-
)
365495
reference_point_canvas = (event.x, event.y)
366496
draw_point_in_canvas(canvas_reference, reference_point_canvas)
367-
print("Reference window clicked at: ", reference_point)
497+
498+
reference_point = np.array(
499+
[
500+
event.x * (reference_original_width / reference_canvas_width),
501+
event.y * (reference_original_height / reference_canvas_height),
502+
]
503+
)
504+
print("Reference window clicked at: ", reference_point.round(1))
505+
506+
try:
507+
reference_point = skipper["reference"]["motion_transformation"].rel_to_abs(
508+
np.array([reference_point])
509+
)[0]
510+
except AttributeError:
511+
pass
512+
513+
reference_point = reference_point.round(1)
368514

369515
if not mode_annotate:
370516
if transformation is not None:
@@ -374,6 +520,7 @@ def reference_coord_chosen_in_reference(event):
374520
reference_point,
375521
footage_original_size,
376522
footage_canvas_size,
523+
skipper["footage"]["motion_transformation"],
377524
)
378525
else:
379526
print("Can't test the transformation yet, still need more points")
@@ -388,7 +535,8 @@ def reference_coord_chosen_in_reference(event):
388535
"fps": fps,
389536
"button_skip": None,
390537
"entry_skip": None,
391-
"motion_estimator": None,
538+
"motion_estimator": motion_estimator_reference,
539+
"motion_transformation": motion_transformation,
392540
"canvas": canvas_reference,
393541
"image_container": reference_image_container,
394542
"current_frame_label": None,
@@ -424,6 +572,9 @@ def handle_skip_frame(event):
424572
return
425573
video = skipper[video_type]["video"]
426574
change_image = False
575+
motion_estimator = skipper[video_type]["motion_estimator"]
576+
motion_transformation = skipper[video_type]["motion_transformation"]
577+
427578
while (frames_to_skip > 0) and (
428579
skipper[video_type]["current_frame"]
429580
< skipper[video_type]["total_frames"]
@@ -434,6 +585,18 @@ def handle_skip_frame(event):
434585

435586
image = next(video.__iter__())
436587

588+
if motion_estimator is not None:
589+
if mask_generator is not None:
590+
mask = mask_generator(image)
591+
else:
592+
mask = None
593+
motion_transformation = motion_estimator.update(
594+
np.array(image), mask
595+
)
596+
597+
skipper[video_type]["motion_estimator"] = motion_estimator
598+
skipper[video_type]["motion_transformation"] = motion_transformation
599+
437600
if change_image:
438601
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
439602
image.thumbnail((desired_size, desired_size))

0 commit comments

Comments
 (0)