7
7
import copy
8
8
import io
9
9
import sys
10
+ import textwrap
10
11
11
12
from bs4 import BeautifulSoup
12
13
from pynput import keyboard
16
17
17
18
from openadapt .config import config
18
19
from openadapt .custom_logger import logger
20
+ from openadapt .drivers import anthropic
19
21
from openadapt .db import db
20
22
from openadapt .privacy .base import ScrubbingProvider , TextScrubbingMixin
21
23
from openadapt .privacy .providers import ScrubProvider
@@ -110,6 +112,9 @@ def processed_action_events(self) -> list:
110
112
if not self ._processed_action_events :
111
113
session = crud .get_new_session (read_only = True )
112
114
self ._processed_action_events = events .get_events (session , self )
115
+ # Preload screenshots to avoid lazy loading later
116
+ for event in self ._processed_action_events :
117
+ event .screenshot
113
118
return self ._processed_action_events
114
119
115
120
def scrub (self , scrubber : ScrubbingProvider ) -> None :
@@ -125,6 +130,7 @@ class ActionEvent(db.Base):
125
130
"""Class representing an action event in the database."""
126
131
127
132
__tablename__ = "action_event"
133
+ _repr_ignore_attrs = ["reducer_names" ]
128
134
129
135
_segment_description_separator = ";"
130
136
@@ -333,6 +339,11 @@ def canonical_text(self, value: str) -> None:
333
339
if not value == self .canonical_text :
334
340
logger .warning (f"{ value = } did not match { self .canonical_text = } " )
335
341
342
+ @property
343
+ def raw_text (self ) -> str :
344
+ """Return a string containing the raw action text (without separators)."""
345
+ return "" .join (self .text .split (config .ACTION_TEXT_SEP ))
346
+
336
347
def __str__ (self ) -> str :
337
348
"""Return a string representation of the action event."""
338
349
attr_names = [
@@ -544,6 +555,75 @@ def next_event(self) -> Union["ActionEvent", None]:
544
555
545
556
return None
546
557
558
+ def prompt_for_description (self , return_image : bool = False ) -> str :
559
+ """Use the Anthropic API to describe what is happening in the action event.
560
+
561
+ Args:
562
+ return_image (bool): Whether to return the image sent to the model.
563
+
564
+ Returns:
565
+ str: The description of the action event.
566
+ """
567
+ from openadapt .plotting import display_event
568
+
569
+ image = display_event (
570
+ self ,
571
+ marker_width_pct = 0.05 ,
572
+ marker_height_pct = 0.05 ,
573
+ darken_outside = 0.7 ,
574
+ display_text = False ,
575
+ marker_fill_transparency = 0 ,
576
+ )
577
+
578
+ if self .text :
579
+ description = f"Type '{ self .raw_text } '"
580
+ else :
581
+ prompt = (
582
+ "What user interface element is contained in the highlighted circle "
583
+ "of the image?"
584
+ )
585
+ # TODO: disambiguate
586
+ system_prompt = textwrap .dedent (
587
+ """
588
+ Briefly describe the user interface element in the screenshot at the
589
+ highlighted location.
590
+ For example:
591
+ - "OK button"
592
+ - "URL bar"
593
+ - "Down arrow"
594
+ DO NOT DESCRIBE ANYTHING OUTSIDE THE HIGHLIGHTED AREA.
595
+ Do not append anything like "is contained within the highlighted circle
596
+ in the calculator interface." Just name the user interface element.
597
+ """
598
+ )
599
+
600
+ logger .info (f"system_prompt=\n { system_prompt } " )
601
+ logger .info (f"prompt=\n { prompt } " )
602
+
603
+ # Call the Anthropic API
604
+ element = anthropic .prompt (
605
+ prompt = prompt ,
606
+ system_prompt = system_prompt ,
607
+ images = [image ],
608
+ )
609
+
610
+ if self .name == "move" :
611
+ description = f"Move mouse to '{ element } '"
612
+ elif self .name == "scroll" :
613
+ # TODO: "scroll to", dx/dy
614
+ description = f"Scroll mouse on '{ element } '"
615
+ elif "click" in self .name :
616
+ description = (
617
+ f"{ self .mouse_button_name .capitalize ()} { self .name } '{ element } '"
618
+ )
619
+ else :
620
+ raise ValueError (f"Unhandled { self .name = } { self } " )
621
+
622
+ if return_image :
623
+ return description , image
624
+ else :
625
+ return description
626
+
547
627
548
628
class WindowEvent (db .Base ):
549
629
"""Class representing a window event in the database."""
0 commit comments