1
- """Generate action descriptions."""
1
+ """Generate natural language descriptions from actions ."""
2
2
3
3
from pprint import pformat
4
-
5
4
from loguru import logger
6
- import cv2
5
+ from PIL import Image , ImageDraw
7
6
import numpy as np
8
7
9
8
from openadapt .db import crud
9
+ from openadapt .plotting import get_font
10
+ from openadapt .utils import get_scaling_factor
11
+
12
+ scaling_factor = get_scaling_factor ()
10
13
11
14
12
15
def embed_description (
13
- image : np . ndarray ,
16
+ image : Image . Image ,
14
17
description : str ,
15
- x : int = None ,
16
- y : int = None ,
17
- ) -> np . ndarray :
18
+ x : int = 0 ,
19
+ y : int = 0 ,
20
+ ) -> Image . Image :
18
21
"""Embed a description into an image at the specified location.
19
22
20
23
Args:
21
- image (np.ndarray ): The image to annotate.
24
+ image (Image.Image ): The image to annotate.
22
25
description (str): The text to embed.
23
- x (int, optional): The x-coordinate. Defaults to None (centered) .
24
- y (int, optional): The y-coordinate. Defaults to None (centered) .
26
+ x (int, optional): The x-coordinate. Defaults to 0 .
27
+ y (int, optional): The y-coordinate. Defaults to 0 .
25
28
26
29
Returns:
27
- np.ndarray : The annotated image.
30
+ Image.Image : The annotated image.
28
31
"""
29
- font = cv2 .FONT_HERSHEY_SIMPLEX
30
- font_scale = 1
31
- font_color = (255 , 255 , 255 ) # White
32
- line_type = 1
32
+ draw = ImageDraw .Draw (image )
33
+ font_size = 30 # Set font size (2x the default size)
34
+ font = get_font ("Arial.ttf" , font_size )
33
35
34
36
# Split description into multiple lines
35
- max_width = 60 # Maximum characters per line
37
+ max_width = image . width
36
38
words = description .split ()
37
39
lines = []
38
40
current_line = []
@@ -45,36 +47,28 @@ def embed_description(
45
47
if current_line :
46
48
lines .append (" " .join (current_line ))
47
49
48
- # Default to center if coordinates are not provided
49
- if x is None or y is None :
50
- x = image .shape [1 ] // 2
51
- y = image .shape [0 ] // 2
50
+ # Adjust coordinates for scaling factor
51
+ x = int (x * scaling_factor )
52
+ y = int (y * scaling_factor )
52
53
53
- # Draw semi-transparent background and text
54
+ # Calculate text dimensions and draw semi-transparent background and text
54
55
for i , line in enumerate (lines ):
55
- text_size , _ = cv2 .getTextSize (line , font , font_scale , line_type )
56
- text_x = max (0 , min (x - text_size [0 ] // 2 , image .shape [1 ] - text_size [0 ]))
57
- text_y = y + i * 20
56
+ bbox = draw .textbbox ((0 , 0 ), line , font = font )
57
+ text_width , text_height = bbox [2 ] - bbox [0 ], bbox [3 ] - bbox [1 ]
58
+ text_x = max (0 , min (x - text_width // 2 , image .width - text_width ))
59
+ text_y = y + i * text_height
58
60
59
61
# Draw background
60
- cv2 .rectangle (
61
- image ,
62
- (text_x - 15 , text_y - 25 ),
63
- (text_x + text_size [0 ] + 15 , text_y + 15 ),
64
- (0 , 0 , 0 ),
65
- - 1 ,
62
+ background_box = (
63
+ text_x - 15 ,
64
+ text_y - 5 ,
65
+ text_x + text_width + 15 ,
66
+ text_y + text_height + 5 ,
66
67
)
68
+ draw .rectangle (background_box , fill = (0 , 0 , 0 , 128 ))
67
69
68
70
# Draw text
69
- cv2 .putText (
70
- image ,
71
- line ,
72
- (text_x , text_y ),
73
- font ,
74
- font_scale ,
75
- font_color ,
76
- line_type ,
77
- )
71
+ draw .text ((text_x , text_y ), line , fill = (255 , 255 , 255 ), font = font )
78
72
79
73
return image
80
74
@@ -88,25 +82,22 @@ def main() -> None:
88
82
for action in action_events :
89
83
description , image = action .prompt_for_description (return_image = True )
90
84
91
- # Convert image to numpy array for OpenCV compatibility
92
- image = np .array (image )
85
+ # Convert image to PIL.Image for compatibility
86
+ image = Image . fromarray ( np .array (image ) )
93
87
94
88
if action .mouse_x is not None and action .mouse_y is not None :
95
89
# Use the mouse coordinates for mouse events
96
90
annotated_image = embed_description (
97
91
image ,
98
92
description ,
99
- x = int (action .mouse_x ) * 2 ,
100
- y = int (action .mouse_y ) * 2 ,
101
93
)
102
94
else :
103
95
# Center the text for other events
104
96
annotated_image = embed_description (image , description )
105
97
106
98
logger .info (f"{ action = } " )
107
99
logger .info (f"{ description = } " )
108
- cv2 .imshow ("Annotated Image" , annotated_image )
109
- cv2 .waitKey (0 )
100
+ annotated_image .show () # Opens the annotated image using the default viewer
110
101
descriptions .append (description )
111
102
112
103
logger .info (f"descriptions=\n { pformat (descriptions )} " )
0 commit comments