Skip to content

Commit 581b9b8

Browse files
0dmabrichr
andauthored
feat: add capture.py - also fixes audio recording (#362)
* merge * Create capture.py * Update capture.py * Update capture.py * it's finally fixed * add dependencies * comment * move code + use config.CAPTURE_DIR_PATH * remove debug lines * Update capture.py * OpenAdaptCapture -> Capture * add camera * Let's have this off by default. * hotfix * fix * linting * Create capture.py * windows * cleanup + lint * Update _windows.py * add audio + new windows recording * screen_recorder.free_resources() * Update _windows.py * isort * add playback recording * Update replay.py * Update replay.py * Update README.md * Revert "Update README.md" This reverts commit 7064103. * Update README.md * Revert "Revert "Update README.md"" This reverts commit 0fe8156. * Update README.md * run pre-commit * Update pyproject.toml * Update openadapt/replay.py * Update openadapt/replay.py * Update openadapt/replay.py * Update replay.py * update poetry.lock --------- Co-authored-by: Richard Abrich <[email protected]> Co-authored-by: Richard Abrich <[email protected]>
1 parent b449202 commit 581b9b8

File tree

9 files changed

+342
-9
lines changed

9 files changed

+342
-9
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
[Join us on Slack](https://join.slack.com/t/mldsai/shared_invite/zt-1uf94nn7r-qcQnS~hinLPKftUapNzbuw)
1+
[Join us on Slack](https://join.slack.com/t/mldsai/shared_invite/zt-1uf94nn7r-qcQnS~hinLPKftUapNzbuw)
22

33
# OpenAdapt: AI-First Process Automation with Transformers
44

openadapt/capture/__init__.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""Capture the screen, audio, and camera as a video on macOS and Windows.
2+
3+
Module: capture.py
4+
"""
5+
import sys
6+
7+
if sys.platform == "darwin":
8+
from . import _macos as impl
9+
elif sys.platform == "win32":
10+
from . import _windows as impl
11+
else:
12+
raise Exception(f"Unsupported platform: {sys.platform}")
13+
14+
device = impl.Capture()
15+
16+
17+
def get_capture() -> impl.Capture:
18+
"""Get the capture object.
19+
20+
Returns:
21+
Capture: The capture object.
22+
"""
23+
return device
24+
25+
26+
def start(audio: bool = False, camera: bool = False) -> None:
27+
"""Start the capture."""
28+
device.start(audio=audio, camera=camera)
29+
30+
31+
def stop() -> None:
32+
"""Stop the capture."""
33+
device.stop()
34+
35+
36+
def test() -> None:
37+
"""Test the capture."""
38+
device.start()
39+
input("Press enter to stop")
40+
device.stop()
41+
42+
43+
if __name__ in ("__main__", "capture"):
44+
test()

openadapt/capture/_macos.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
"""Allows for capturing the screen and audio on macOS.
2+
3+
This is based on: https://gist.github.com/timsutton/0c6439eb6eb1621a5964
4+
5+
usage: see bottom of file
6+
"""
7+
from datetime import datetime
8+
from sys import platform
9+
import os
10+
11+
from Foundation import NSURL, NSObject # type: ignore # noqa
12+
from Quartz import CGMainDisplayID # type: ignore # noqa
13+
import AVFoundation as AVF # type: ignore # noqa
14+
import objc # type: ignore # noqa
15+
16+
from openadapt import config
17+
18+
19+
class Capture:
20+
"""Capture the screen, audio, and camera on macOS."""
21+
22+
def __init__(self) -> None:
23+
"""Initialize the capture object."""
24+
if platform != "darwin":
25+
raise NotImplementedError(
26+
"This is the macOS implementation, please use the Windows version"
27+
)
28+
29+
objc.options.structs_indexable = True
30+
31+
def start(self, audio: bool = False, camera: bool = False) -> None:
32+
"""Start capturing the screen, audio, and camera.
33+
34+
Args:
35+
audio (bool, optional): Whether to capture audio (default: False).
36+
camera (bool, optional): Whether to capture the camera (default: False).
37+
"""
38+
self.display_id = CGMainDisplayID()
39+
self.session = AVF.AVCaptureSession.alloc().init()
40+
self.screen_input = AVF.AVCaptureScreenInput.alloc().initWithDisplayID_(
41+
self.display_id
42+
)
43+
self.file_output = AVF.AVCaptureMovieFileOutput.alloc().init()
44+
self.camera_session = None # not used if camera=False
45+
46+
# Create an audio device input with the default audio device
47+
self.audio_input = AVF.AVCaptureDeviceInput.alloc().initWithDevice_error_(
48+
AVF.AVCaptureDevice.defaultDeviceWithMediaType_(AVF.AVMediaTypeAudio), None
49+
)
50+
51+
if not os.path.exists(config.CAPTURE_DIR_PATH):
52+
os.mkdir(config.CAPTURE_DIR_PATH)
53+
self.file_url = NSURL.fileURLWithPath_(
54+
os.path.join(
55+
config.CAPTURE_DIR_PATH,
56+
datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".mov",
57+
)
58+
)
59+
if audio and self.session.canAddInput_(self.audio_input[0]):
60+
self.session.addInput_(self.audio_input[0])
61+
62+
if self.session.canAddInput_(self.screen_input):
63+
self.session.addInput_(self.screen_input)
64+
65+
self.session.addOutput_(self.file_output)
66+
67+
self.session.startRunning()
68+
69+
# Cheat and pass a dummy delegate object where
70+
# normally we'd have a AVCaptureFileOutputRecordingDelegate
71+
self.file_url = (
72+
self.file_output.startRecordingToOutputFileURL_recordingDelegate_(
73+
self.file_url, NSObject.alloc().init()
74+
)
75+
)
76+
77+
if camera:
78+
self._use_camera()
79+
80+
def _use_camera(self) -> None:
81+
"""Start capturing the camera."""
82+
self.camera_session = AVF.AVCaptureSession.alloc().init()
83+
self.camera_file_output = AVF.AVCaptureMovieFileOutput.alloc().init()
84+
self.camera_input = AVF.AVCaptureDeviceInput.alloc().initWithDevice_error_(
85+
AVF.AVCaptureDevice.defaultDeviceWithMediaType_(AVF.AVMediaTypeVideo), None
86+
)
87+
88+
if self.camera_session.canAddInput_(self.camera_input[0]):
89+
self.camera_session.addInput_(self.camera_input[0])
90+
self.camera_session.startRunning()
91+
92+
self.camera_session.addOutput_(self.camera_file_output)
93+
94+
self.camera_url = (
95+
self.camera_file_output.startRecordingToOutputFileURL_recordingDelegate_(
96+
NSURL.fileURLWithPath_(
97+
os.path.join(
98+
config.CAPTURE_DIR_PATH,
99+
datetime.now().strftime("camera.%Y-%m-%d-%H-%M-%S") + ".mov",
100+
)
101+
),
102+
NSObject.alloc().init(),
103+
)
104+
)
105+
106+
def stop(self) -> None:
107+
"""Stop capturing the screen, audio, and camera."""
108+
self.session.stopRunning()
109+
if self.camera_session:
110+
self.camera_session.stopRunning()
111+
112+
113+
if __name__ == "__main__":
114+
capture = Capture()
115+
capture.start(audio=True, camera=False)
116+
input("Press enter to stop")
117+
capture.stop()

openadapt/capture/_windows.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
"""Allows for capturing the screen and audio on Windows."""
2+
from datetime import datetime
3+
from sys import platform
4+
import os
5+
import wave
6+
7+
from screen_recorder_sdk import screen_recorder
8+
import pyaudio
9+
10+
from openadapt import config
11+
12+
13+
class Capture:
14+
"""Capture the screen video and audio on Windows."""
15+
16+
def __init__(self, pid: int = 0) -> None:
17+
"""Initialize the capture object.
18+
19+
Args:
20+
pid (int, optional): The process ID of the window to capture.
21+
Defaults to 0 (the entire screen)
22+
"""
23+
if platform != "win32":
24+
raise NotImplementedError(
25+
"This is the Windows implementation, please use the macOS version"
26+
)
27+
self.is_recording = False
28+
self.video_out = None
29+
self.audio_out = None
30+
self.pid = pid
31+
32+
screen_recorder.init_resources(screen_recorder.RecorderParams(pid=self.pid))
33+
34+
# Initialize PyAudio
35+
self.audio = pyaudio.PyAudio()
36+
self.audio_stream = None
37+
self.audio_frames = []
38+
39+
def start(self, audio: bool = True) -> None:
40+
"""Start capturing the screen video and audio.
41+
42+
Args:
43+
audio (bool): Whether to capture audio.
44+
"""
45+
if self.is_recording:
46+
raise RuntimeError("Recording is already in progress")
47+
self.is_recording = True
48+
49+
# Start video recording
50+
self.video_out = os.path.join(
51+
config.CAPTURES_DIR,
52+
datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".mov",
53+
)
54+
screen_recorder.start_video_recording(self.video_out, 30, 8000000, True)
55+
56+
# Start audio recording
57+
if audio:
58+
self.audio_out = os.path.join(
59+
config.CAPTURES_DIR,
60+
datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".wav",
61+
)
62+
self.audio_stream = self.audio.open(
63+
format=pyaudio.paInt16,
64+
channels=2,
65+
rate=44100,
66+
input=True,
67+
frames_per_buffer=1024,
68+
stream_callback=self._audio_callback,
69+
)
70+
self.audio_frames = []
71+
72+
def _audio_callback(
73+
self, in_data: bytes, frame_count: int, time_info: dict, status: int
74+
) -> tuple:
75+
self.audio_frames.append(in_data)
76+
return (None, pyaudio.paContinue)
77+
78+
def stop(self) -> None:
79+
"""Stop capturing the screen video and audio."""
80+
if self.is_recording:
81+
screen_recorder.stop_video_recording()
82+
if self.audio_stream:
83+
self.audio_stream.stop_stream()
84+
self.audio_stream.close()
85+
self.audio.terminate()
86+
self.save_audio()
87+
self.is_recording = False
88+
screen_recorder.free_resources()
89+
90+
def save_audio(self) -> None:
91+
"""Save the captured audio to a WAV file."""
92+
with wave.open(self.audio_out, "wb") as wf:
93+
wf.setnchannels(2)
94+
wf.setsampwidth(self.audio.get_sample_size(pyaudio.paInt16))
95+
wf.setframerate(44100)
96+
wf.writeframes(b"".join(self.audio_frames))
97+
98+
99+
if __name__ == "__main__":
100+
capture = Capture()
101+
capture.start()
102+
input("Press enter to stop")
103+
capture.stop()

openadapt/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@
4646
"ACTION_TEXT_SEP": "-",
4747
"ACTION_TEXT_NAME_PREFIX": "<",
4848
"ACTION_TEXT_NAME_SUFFIX": ">",
49+
# PERFORMANCE PLOTTING CONFIGURATION
50+
"PLOT_PERFORMANCE": True,
51+
# CAPTURE CONFIGURATION
52+
"CAPTURE_DIR_PATH": "captures",
4953
# APP CONFIGURATIONS
5054
"APP_DARK_MODE": False,
5155
# SCRUBBING CONFIGURATIONS

openadapt/replay.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,14 @@
1010
--timestamp=<timestamp> Timestamp of the recording to replay.
1111
1212
"""
13-
13+
from time import sleep
1414
from typing import Union
15+
import os
1516

1617
from loguru import logger
1718
import fire
1819

19-
from openadapt import crud, utils
20+
from openadapt import capture, crud, utils
2021
from openadapt.models import Recording
2122

2223
LOG_LEVEL = "INFO"
@@ -25,6 +26,7 @@
2526
@logger.catch
2627
def replay(
2728
strategy_name: str,
29+
record: bool = False,
2830
timestamp: Union[str, None] = None,
2931
recording: Recording = None,
3032
) -> bool:
@@ -34,6 +36,7 @@ def replay(
3436
strategy_name (str): Name of the replay strategy to use.
3537
timestamp (str, optional): Timestamp of the recording to replay.
3638
recording (Recording, optional): Recording to replay.
39+
record (bool, optional): Flag indicating whether to record the replay.
3740
3841
Returns:
3942
bool: True if replay was successful, None otherwise.
@@ -66,8 +69,30 @@ def replay(
6669
strategy = strategy_class(recording)
6770
logger.info(f"{strategy=}")
6871

69-
strategy.run()
70-
return True
72+
handler = None
73+
rval = True
74+
if record:
75+
capture.start(audio=False, camera=False)
76+
# TODO: handle this more robustly
77+
sleep(1)
78+
file_name = f"log-{strategy_name}-{recording.timestamp}.log"
79+
# TODO: make configurable
80+
dir_name = "captures"
81+
file_path = os.path.join(dir_name, file_name)
82+
logger.info(f"{file_path=}")
83+
handler = logger.add(open(file_path, "w"))
84+
try:
85+
strategy.run()
86+
except Exception as e:
87+
logger.exception(e)
88+
rval = False
89+
90+
if record:
91+
sleep(1)
92+
capture.stop()
93+
logger.remove(handler)
94+
95+
return rval
7196

7297

7398
# Entry point

openadapt/window/_macos.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ def get_active_window(window_meta: dict) -> ApplicationServices.AXUIElementRef |
9696
return None
9797
return window
9898

99-
10099
def get_window_data(window_meta: dict) -> dict:
101100
"""Get the data of the window.
102101

0 commit comments

Comments
 (0)