Skip to content

Commit b067f48

Browse files
author
Daniel Flores
committed
language, print frame mappings struct
1 parent 7b9da78 commit b067f48

File tree

1 file changed

+31
-34
lines changed

1 file changed

+31
-34
lines changed

examples/decoding/custom_frame_mappings.py

Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@
2525
# First, some boilerplate: we'll download a short video from the web, and
2626
# use ffmpeg to create a longer version by repeating it multiple times. We'll end up
2727
# with two videos: a short one of approximately 3 minutes and a long one of about 13 minutes.
28-
# You can ignore that part and jump right below to :ref:`frame_mappings_creation`.
28+
# You can ignore this part and skip below to :ref:`frame_mappings_creation`.
2929

3030
import tempfile
3131
from pathlib import Path
3232
import subprocess
3333
import requests
3434

35-
url = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4.mp4"
35+
url = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4"
3636
response = requests.get(url, headers={"User-Agent": ""})
3737
if response.status_code != 200:
3838
raise RuntimeError(f"Failed to download video. {response.status_code = }.")
@@ -63,45 +63,48 @@
6363
# Creating custom frame mappings with ffprobe
6464
# -------------------------------------------
6565
#
66-
# The key to using custom frame mappings is preprocessing your videos to extract
67-
# frame timing information and keyframe indicators. We use ffprobe to generate
68-
# JSON files containing this metadata.
66+
# To generate JSON files containing the required video metadata, we recommend using ffprobe.
67+
# The following frame metadata fields are needed
68+
# (the ``pkt_`` prefix is needed for older versions of FFmpeg):
69+
#
70+
# - ``pts`` / ``pkt_pts``: Presentation timestamps for each frame
71+
# - ``duration`` / ``pkt_duration``: Duration of each frame
72+
# - ``key_frame``: Boolean indicating which frames are key frames
6973

7074
from pathlib import Path
7175
import subprocess
7276
import tempfile
7377
from time import perf_counter_ns
78+
import json
7479

7580
stream_index = 0
76-
7781
long_json_path = Path(temp_dir) / "long_custom_frame_mappings.json"
7882
short_json_path = Path(temp_dir) / "short_custom_frame_mappings.json"
7983

8084
ffprobe_cmd = ["ffprobe", "-i", f"{long_video_path}", "-select_streams", f"{stream_index}", "-show_frames", "-show_entries", "frame=pkt_pts,pkt_duration,key_frame", "-of", "json"]
8185
ffprobe_result = subprocess.run(ffprobe_cmd, check=True, capture_output=True, text=True)
8286
with open(long_json_path, "w") as f:
8387
f.write(ffprobe_result.stdout)
84-
print(f"Wrote {len(ffprobe_result.stdout)} characters to {long_json_path}")
8588

8689
ffprobe_cmd = ["ffprobe", "-i", f"{short_video_path}", "-select_streams", f"{stream_index}", "-show_frames", "-show_entries", "frame=pkt_pts,pkt_duration,key_frame", "-of", "json"]
8790
ffprobe_result = subprocess.run(ffprobe_cmd, check=True, capture_output=True, text=True)
8891
with open(short_json_path, "w") as f:
8992
f.write(ffprobe_result.stdout)
90-
print(f"Wrote {len(ffprobe_result.stdout)} characters to {short_json_path}")
93+
94+
sample_data = json.loads(ffprobe_result.stdout)
95+
print("Data structure of custom frame mappings:")
96+
for frame in sample_data["frames"][:3]:
97+
print(f"{frame}")
9198

9299
# %%
93-
# .. _perf_creation:
100+
# .. _custom_frame_mappings_perf_creation:
94101
#
95102
# Performance: ``VideoDecoder`` creation
96103
# --------------------------------------
97104
#
98-
# In terms of performance, custom frame mappings ultimately affect the
99-
# **creation** of a :class:`~torchcodec.decoders.VideoDecoder` object. The
100-
# longer the video, the higher the performance gain.
101-
# Let's define a benchmarking function to measure performance.
102-
# Note that when using file-like objects for custom_frame_mappings, we need to
103-
# seek back to the beginning between iterations since the JSON data is consumed
104-
# during VideoDecoder creation.
105+
# Custom frame mappings affect the **creation** of a :class:`~torchcodec.decoders.VideoDecoder`
106+
# object. As video length increases, the performance gain compared to exact mode increases.
107+
#
105108

106109
import torch
107110

@@ -126,31 +129,26 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
126129
med = times.median().item()
127130
print(f"{med = :.2f}ms +- {std:.2f}")
128131

129-
# %%
130-
# Now let's compare the performance of creating VideoDecoder objects with custom
131-
# frame mappings versus the exact seek mode. You'll see that custom
132-
# frame mappings provide significant speedups, especially for longer videos.
133-
134132

135133
for video_path, json_path in ((short_video_path, short_json_path), (long_video_path, long_json_path)):
136-
print(f"Running benchmarks on {Path(video_path).name}")
134+
print(f"\nRunning benchmarks on {Path(video_path).name}")
137135

138136
print("Creating a VideoDecoder object with custom_frame_mappings:")
139137
with open(json_path, "r") as f:
140138
bench(VideoDecoder, file_like=True, source=video_path, stream_index=stream_index, custom_frame_mappings=f)
141139

142-
# Compare against seek_modes
140+
# Compare against exact seek_mode
143141
print("Creating a VideoDecoder object with seek_mode='exact':")
144142
bench(VideoDecoder, source=video_path, stream_index=stream_index, seek_mode="exact")
145143

146144
# %%
147145
# Performance: Frame decoding with custom frame mappings
148146
# ------------------------------------------------------
149147
#
150-
# Although the custom_frame_mappings parameter only affects the performance of
151-
# the :class:`~torchcodec.decoders.VideoDecoder` creation, decoding workflows
152-
# typically involve creating a :class:`~torchcodec.decoders.VideoDecoder` instance.
153-
# As a result, the performance benefits of custom_frame_mappings can be seen.
148+
# Although using custom_frame_mappings only impacts the initialization speed of
149+
# :class:`~torchcodec.decoders.VideoDecoder`, decoding workflows
150+
# usually involve creating a :class:`~torchcodec.decoders.VideoDecoder` instance,
151+
# so the performance benefits are realized.
154152

155153

156154
def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None):
@@ -163,8 +161,8 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
163161

164162

165163
for video_path, json_path in ((short_video_path, short_json_path), (long_video_path, long_json_path)):
166-
print(f"Running benchmarks on {Path(video_path).name}")
167-
print("Decoding frames with custom_frame_mappings JSON str from file:")
164+
print(f"\nRunning benchmarks on {Path(video_path).name}")
165+
print("Decoding frames with custom_frame_mappings:")
168166
with open(json_path, "r") as f:
169167
bench(decode_frames, file_like=True, video_path=video_path, custom_frame_mappings=f)
170168

@@ -175,9 +173,9 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
175173
# Accuracy: Metadata and frame retrieval
176174
# --------------------------------------
177175
#
178-
# We've seen that using custom frame mappings can significantly speed up
179-
# the :class:`~torchcodec.decoders.VideoDecoder` creation. The advantage is that
180-
# seeking is still as accurate as with ``seek_mode="exact"``.
176+
# In addition to the instantiation speed up compared to ``seek_mode="exact"``, using custom frame mappings
177+
# also retains the benefit of exact metadata and frame seeking.
178+
#
181179

182180
print("Metadata of short video with custom_frame_mappings:")
183181
with open(short_json_path, "r") as f:
@@ -202,9 +200,8 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
202200
#
203201
# Custom frame mappings contain the same frame index information
204202
# that would normally be computed during the :term:`scan` operation in exact mode.
205-
# (frame presentation timestamps (PTS), durations, and keyframe indicators)
206203
# By providing this information to the :class:`~torchcodec.decoders.VideoDecoder`
207-
# as a JSON, it eliminates the need for the expensive scan while preserving all the
204+
# as a JSON, it eliminates the need for the expensive scan while preserving the
208205
# accuracy benefits.
209206
#
210207
# Which mode should I use?

0 commit comments

Comments
 (0)