feat: add capture.py - also fixes audio recording (#362)

0dm · abrichr · web-flow · commit 581b9b831fe2 · 2023-08-29T03:26:21.000-04:00
* merge * Create capture.py * Update capture.py * Update capture.py * it's finally fixed * add dependencies * comment * move code + use config.CAPTURE_DIR_PATH * remove debug lines * Update capture.py * OpenAdaptCapture -> Capture * add camera * Let's have this off by default. * hotfix * fix * linting * Create capture.py * windows * cleanup + lint * Update _windows.py * add audio + new windows recording * screen_recorder.free_resources() * Update _windows.py * isort * add playback recording * Update replay.py * Update replay.py * Update README.md * Revert "Update README.md" This reverts commit 7064103. * Update README.md * Revert "Revert "Update README.md"" This reverts commit 0fe8156. * Update README.md * run pre-commit * Update pyproject.toml * Update openadapt/replay.py * Update openadapt/replay.py * Update openadapt/replay.py * Update replay.py * update poetry.lock --------- Co-authored-by: Richard Abrich <richard.abrich@gmail.com> Co-authored-by: Richard Abrich <richard.abrich@mldsai.com>
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-[Join us on Slack](https://join.slack.com/t/mldsai/shared_invite/zt-1uf94nn7r-qcQnS~hinLPKftUapNzbuw)
+[Join us on Slack](https://join.slack.com/t/mldsai/shared_invite/zt-1uf94nn7r-qcQnS~hinLPKftUapNzbuw)  
 
 # OpenAdapt: AI-First Process Automation with Transformers
 
diff --git a/openadapt/capture/__init__.py b/openadapt/capture/__init__.py
@@ -0,0 +1,44 @@
+"""Capture the screen, audio, and camera as a video on macOS and Windows.
+
+Module: capture.py
+"""
+import sys
+
+if sys.platform == "darwin":
+    from . import _macos as impl
+elif sys.platform == "win32":
+    from . import _windows as impl
+else:
+    raise Exception(f"Unsupported platform: {sys.platform}")
+
+device = impl.Capture()
+
+
+def get_capture() -> impl.Capture:
+    """Get the capture object.
+
+    Returns:
+        Capture: The capture object.
+    """
+    return device
+
+
+def start(audio: bool = False, camera: bool = False) -> None:
+    """Start the capture."""
+    device.start(audio=audio, camera=camera)
+
+
+def stop() -> None:
+    """Stop the capture."""
+    device.stop()
+
+
+def test() -> None:
+    """Test the capture."""
+    device.start()
+    input("Press enter to stop")
+    device.stop()
+
+
+if __name__ in ("__main__", "capture"):
+    test()
diff --git a/openadapt/capture/_macos.py b/openadapt/capture/_macos.py
@@ -0,0 +1,117 @@
+"""Allows for capturing the screen and audio on macOS.
+
+This is based on: https://gist.github.com/timsutton/0c6439eb6eb1621a5964
+
+usage: see bottom of file
+"""
+from datetime import datetime
+from sys import platform
+import os
+
+from Foundation import NSURL, NSObject  # type: ignore # noqa
+from Quartz import CGMainDisplayID  # type: ignore # noqa
+import AVFoundation as AVF  # type: ignore # noqa
+import objc  # type: ignore # noqa
+
+from openadapt import config
+
+
+class Capture:
+    """Capture the screen, audio, and camera on macOS."""
+
+    def __init__(self) -> None:
+        """Initialize the capture object."""
+        if platform != "darwin":
+            raise NotImplementedError(
+                "This is the macOS implementation, please use the Windows version"
+            )
+
+        objc.options.structs_indexable = True
+
+    def start(self, audio: bool = False, camera: bool = False) -> None:
+        """Start capturing the screen, audio, and camera.
+
+        Args:
+            audio (bool, optional): Whether to capture audio (default: False).
+            camera (bool, optional): Whether to capture the camera (default: False).
+        """
+        self.display_id = CGMainDisplayID()
+        self.session = AVF.AVCaptureSession.alloc().init()
+        self.screen_input = AVF.AVCaptureScreenInput.alloc().initWithDisplayID_(
+            self.display_id
+        )
+        self.file_output = AVF.AVCaptureMovieFileOutput.alloc().init()
+        self.camera_session = None  # not used if camera=False
+
+        # Create an audio device input with the default audio device
+        self.audio_input = AVF.AVCaptureDeviceInput.alloc().initWithDevice_error_(
+            AVF.AVCaptureDevice.defaultDeviceWithMediaType_(AVF.AVMediaTypeAudio), None
+        )
+
+        if not os.path.exists(config.CAPTURE_DIR_PATH):
+            os.mkdir(config.CAPTURE_DIR_PATH)
+        self.file_url = NSURL.fileURLWithPath_(
+            os.path.join(
+                config.CAPTURE_DIR_PATH,
+                datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".mov",
+            )
+        )
+        if audio and self.session.canAddInput_(self.audio_input[0]):
+            self.session.addInput_(self.audio_input[0])
+
+        if self.session.canAddInput_(self.screen_input):
+            self.session.addInput_(self.screen_input)
+
+        self.session.addOutput_(self.file_output)
+
+        self.session.startRunning()
+
+        # Cheat and pass a dummy delegate object where
+        # normally we'd have a AVCaptureFileOutputRecordingDelegate
+        self.file_url = (
+            self.file_output.startRecordingToOutputFileURL_recordingDelegate_(
+                self.file_url, NSObject.alloc().init()
+            )
+        )
+
+        if camera:
+            self._use_camera()
+
+    def _use_camera(self) -> None:
+        """Start capturing the camera."""
+        self.camera_session = AVF.AVCaptureSession.alloc().init()
+        self.camera_file_output = AVF.AVCaptureMovieFileOutput.alloc().init()
+        self.camera_input = AVF.AVCaptureDeviceInput.alloc().initWithDevice_error_(
+            AVF.AVCaptureDevice.defaultDeviceWithMediaType_(AVF.AVMediaTypeVideo), None
+        )
+
+        if self.camera_session.canAddInput_(self.camera_input[0]):
+            self.camera_session.addInput_(self.camera_input[0])
+        self.camera_session.startRunning()
+
+        self.camera_session.addOutput_(self.camera_file_output)
+
+        self.camera_url = (
+            self.camera_file_output.startRecordingToOutputFileURL_recordingDelegate_(
+                NSURL.fileURLWithPath_(
+                    os.path.join(
+                        config.CAPTURE_DIR_PATH,
+                        datetime.now().strftime("camera.%Y-%m-%d-%H-%M-%S") + ".mov",
+                    )
+                ),
+                NSObject.alloc().init(),
+            )
+        )
+
+    def stop(self) -> None:
+        """Stop capturing the screen, audio, and camera."""
+        self.session.stopRunning()
+        if self.camera_session:
+            self.camera_session.stopRunning()
+
+
+if __name__ == "__main__":
+    capture = Capture()
+    capture.start(audio=True, camera=False)
+    input("Press enter to stop")
+    capture.stop()
diff --git a/openadapt/capture/_windows.py b/openadapt/capture/_windows.py
@@ -0,0 +1,103 @@
+"""Allows for capturing the screen and audio on Windows."""
+from datetime import datetime
+from sys import platform
+import os
+import wave
+
+from screen_recorder_sdk import screen_recorder
+import pyaudio
+
+from openadapt import config
+
+
+class Capture:
+    """Capture the screen video and audio on Windows."""
+
+    def __init__(self, pid: int = 0) -> None:
+        """Initialize the capture object.
+
+        Args:
+            pid (int, optional): The process ID of the window to capture.
+            Defaults to 0 (the entire screen)
+        """
+        if platform != "win32":
+            raise NotImplementedError(
+                "This is the Windows implementation, please use the macOS version"
+            )
+        self.is_recording = False
+        self.video_out = None
+        self.audio_out = None
+        self.pid = pid
+
+        screen_recorder.init_resources(screen_recorder.RecorderParams(pid=self.pid))
+
+        # Initialize PyAudio
+        self.audio = pyaudio.PyAudio()
+        self.audio_stream = None
+        self.audio_frames = []
+
+    def start(self, audio: bool = True) -> None:
+        """Start capturing the screen video and audio.
+
+        Args:
+            audio (bool): Whether to capture audio.
+        """
+        if self.is_recording:
+            raise RuntimeError("Recording is already in progress")
+        self.is_recording = True
+
+        # Start video recording
+        self.video_out = os.path.join(
+            config.CAPTURES_DIR,
+            datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".mov",
+        )
+        screen_recorder.start_video_recording(self.video_out, 30, 8000000, True)
+
+        # Start audio recording
+        if audio:
+            self.audio_out = os.path.join(
+                config.CAPTURES_DIR,
+                datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".wav",
+            )
+            self.audio_stream = self.audio.open(
+                format=pyaudio.paInt16,
+                channels=2,
+                rate=44100,
+                input=True,
+                frames_per_buffer=1024,
+                stream_callback=self._audio_callback,
+            )
+            self.audio_frames = []
+
+    def _audio_callback(
+        self, in_data: bytes, frame_count: int, time_info: dict, status: int
+    ) -> tuple:
+        self.audio_frames.append(in_data)
+        return (None, pyaudio.paContinue)
+
+    def stop(self) -> None:
+        """Stop capturing the screen video and audio."""
+        if self.is_recording:
+            screen_recorder.stop_video_recording()
+            if self.audio_stream:
+                self.audio_stream.stop_stream()
+                self.audio_stream.close()
+                self.audio.terminate()
+                self.save_audio()
+            self.is_recording = False
+            screen_recorder.free_resources()
+
+    def save_audio(self) -> None:
+        """Save the captured audio to a WAV file."""
+        with wave.open(self.audio_out, "wb") as wf:
+            wf.setnchannels(2)
+            wf.setsampwidth(self.audio.get_sample_size(pyaudio.paInt16))
+            wf.setframerate(44100)
+            wf.writeframes(b"".join(self.audio_frames))
+
+
+if __name__ == "__main__":
+    capture = Capture()
+    capture.start()
+    input("Press enter to stop")
+    capture.stop()
diff --git a/openadapt/config.py b/openadapt/config.py
@@ -46,6 +46,10 @@
     "ACTION_TEXT_SEP": "-",
     "ACTION_TEXT_NAME_PREFIX": "<",
     "ACTION_TEXT_NAME_SUFFIX": ">",
+    # PERFORMANCE PLOTTING CONFIGURATION
+    "PLOT_PERFORMANCE": True,
+    # CAPTURE CONFIGURATION
+    "CAPTURE_DIR_PATH": "captures",
     # APP CONFIGURATIONS
     "APP_DARK_MODE": False,
     # SCRUBBING CONFIGURATIONS
diff --git a/openadapt/replay.py b/openadapt/replay.py
@@ -10,13 +10,14 @@
 --timestamp=<timestamp> Timestamp of the recording to replay.
 
 """
-
+from time import sleep
 from typing import Union
+import os
 
 from loguru import logger
 import fire
 
-from openadapt import crud, utils
+from openadapt import capture, crud, utils
 from openadapt.models import Recording
 
 LOG_LEVEL = "INFO"
@@ -25,6 +26,7 @@
 @logger.catch
 def replay(
     strategy_name: str,
+    record: bool = False,
     timestamp: Union[str, None] = None,
     recording: Recording = None,
 ) -> bool:
@@ -34,6 +36,7 @@ def replay(
         strategy_name (str): Name of the replay strategy to use.
         timestamp (str, optional): Timestamp of the recording to replay.
         recording (Recording, optional): Recording to replay.
+        record (bool, optional): Flag indicating whether to record the replay.
 
     Returns:
         bool: True if replay was successful, None otherwise.
@@ -66,8 +69,30 @@ def replay(
     strategy = strategy_class(recording)
     logger.info(f"{strategy=}")
 
-    strategy.run()
-    return True
+    handler = None
+    rval = True
+    if record:
+        capture.start(audio=False, camera=False)
+        # TODO: handle this more robustly
+        sleep(1)
+        file_name = f"log-{strategy_name}-{recording.timestamp}.log"
+        # TODO: make configurable
+        dir_name = "captures"
+        file_path = os.path.join(dir_name, file_name)
+        logger.info(f"{file_path=}")
+        handler = logger.add(open(file_path, "w"))
+    try:
+        strategy.run()
+    except Exception as e:
+        logger.exception(e)
+        rval = False
+
+    if record:
+        sleep(1)
+        capture.stop()
+        logger.remove(handler)
+
+    return rval
 
 
 # Entry point
diff --git a/openadapt/window/_macos.py b/openadapt/window/_macos.py
@@ -96,7 +96,6 @@ def get_active_window(window_meta: dict) -> ApplicationServices.AXUIElementRef |
         return None
     return window
 
-
 def get_window_data(window_meta: dict) -> dict:
     """Get the data of the window.
 
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-[Join us on Slack](https://join.slack.com/t/mldsai/shared_invite/zt-1uf94nn7r-qcQnS~hinLPKftUapNzbuw)`
	`1`	`+[Join us on Slack](https://join.slack.com/t/mldsai/shared_invite/zt-1uf94nn7r-qcQnS~hinLPKftUapNzbuw)`
`2`	`2`
`3`	`3`	`# OpenAdapt: AI-First Process Automation with Transformers`
`4`	`4`