25
25
# First, some boilerplate: we'll download a short video from the web, and
26
26
# use ffmpeg to create a longer version by repeating it multiple times. We'll end up
27
27
# with two videos: a short one of approximately 3 minutes and a long one of about 13 minutes.
28
- # You can ignore that part and jump right below to :ref:`frame_mappings_creation`.
28
+ # You can ignore this part and skip below to :ref:`frame_mappings_creation`.
29
29
30
30
import tempfile
31
31
from pathlib import Path
32
32
import subprocess
33
33
import requests
34
34
35
- url = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4 .mp4"
35
+ url = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small .mp4"
36
36
response = requests .get (url , headers = {"User-Agent" : "" })
37
37
if response .status_code != 200 :
38
38
raise RuntimeError (f"Failed to download video. { response .status_code = } ." )
63
63
# Creating custom frame mappings with ffprobe
64
64
# -------------------------------------------
65
65
#
66
- # The key to using custom frame mappings is preprocessing your videos to extract
67
- # frame timing information and keyframe indicators. We use ffprobe to generate
68
- # JSON files containing this metadata.
66
+ # To generate JSON files containing the required video metadata, we recommend using ffprobe.
67
+ # The following frame metadata fields are needed
68
+ # (the ``pkt_`` prefix is needed for older versions of FFmpeg):
69
+ #
70
+ # - ``pts`` / ``pkt_pts``: Presentation timestamps for each frame
71
+ # - ``duration`` / ``pkt_duration``: Duration of each frame
72
+ # - ``key_frame``: Boolean indicating which frames are key frames
69
73
70
74
from pathlib import Path
71
75
import subprocess
72
76
import tempfile
73
77
from time import perf_counter_ns
78
+ import json
74
79
75
80
stream_index = 0
76
-
77
81
long_json_path = Path (temp_dir ) / "long_custom_frame_mappings.json"
78
82
short_json_path = Path (temp_dir ) / "short_custom_frame_mappings.json"
79
83
80
84
ffprobe_cmd = ["ffprobe" , "-i" , f"{ long_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pkt_pts,pkt_duration,key_frame" , "-of" , "json" ]
81
85
ffprobe_result = subprocess .run (ffprobe_cmd , check = True , capture_output = True , text = True )
82
86
with open (long_json_path , "w" ) as f :
83
87
f .write (ffprobe_result .stdout )
84
- print (f"Wrote { len (ffprobe_result .stdout )} characters to { long_json_path } " )
85
88
86
89
ffprobe_cmd = ["ffprobe" , "-i" , f"{ short_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pkt_pts,pkt_duration,key_frame" , "-of" , "json" ]
87
90
ffprobe_result = subprocess .run (ffprobe_cmd , check = True , capture_output = True , text = True )
88
91
with open (short_json_path , "w" ) as f :
89
92
f .write (ffprobe_result .stdout )
90
- print (f"Wrote { len (ffprobe_result .stdout )} characters to { short_json_path } " )
93
+
94
+ sample_data = json .loads (ffprobe_result .stdout )
95
+ print ("Data structure of custom frame mappings:" )
96
+ for frame in sample_data ["frames" ][:3 ]:
97
+ print (f"{ frame } " )
91
98
92
99
# %%
93
- # .. _perf_creation :
100
+ # .. _custom_frame_mappings_perf_creation :
94
101
#
95
102
# Performance: ``VideoDecoder`` creation
96
103
# --------------------------------------
97
104
#
98
- # In terms of performance, custom frame mappings ultimately affect the
99
- # **creation** of a :class:`~torchcodec.decoders.VideoDecoder` object. The
100
- # longer the video, the higher the performance gain.
101
- # Let's define a benchmarking function to measure performance.
102
- # Note that when using file-like objects for custom_frame_mappings, we need to
103
- # seek back to the beginning between iterations since the JSON data is consumed
104
- # during VideoDecoder creation.
105
+ # Custom frame mappings affect the **creation** of a :class:`~torchcodec.decoders.VideoDecoder`
106
+ # object. As video length increases, the performance gain compared to exact mode increases.
107
+ #
105
108
106
109
import torch
107
110
@@ -126,31 +129,26 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
126
129
med = times .median ().item ()
127
130
print (f"{ med = :.2f} ms +- { std :.2f} " )
128
131
129
- # %%
130
- # Now let's compare the performance of creating VideoDecoder objects with custom
131
- # frame mappings versus the exact seek mode. You'll see that custom
132
- # frame mappings provide significant speedups, especially for longer videos.
133
-
134
132
135
133
for video_path , json_path in ((short_video_path , short_json_path ), (long_video_path , long_json_path )):
136
- print (f"Running benchmarks on { Path (video_path ).name } " )
134
+ print (f"\n Running benchmarks on { Path (video_path ).name } " )
137
135
138
136
print ("Creating a VideoDecoder object with custom_frame_mappings:" )
139
137
with open (json_path , "r" ) as f :
140
138
bench (VideoDecoder , file_like = True , source = video_path , stream_index = stream_index , custom_frame_mappings = f )
141
139
142
- # Compare against seek_modes
140
+ # Compare against exact seek_mode
143
141
print ("Creating a VideoDecoder object with seek_mode='exact':" )
144
142
bench (VideoDecoder , source = video_path , stream_index = stream_index , seek_mode = "exact" )
145
143
146
144
# %%
147
145
# Performance: Frame decoding with custom frame mappings
148
146
# ------------------------------------------------------
149
147
#
150
- # Although the custom_frame_mappings parameter only affects the performance of
151
- # the :class:`~torchcodec.decoders.VideoDecoder` creation , decoding workflows
152
- # typically involve creating a :class:`~torchcodec.decoders.VideoDecoder` instance.
153
- # As a result, the performance benefits of custom_frame_mappings can be seen .
148
+ # Although using custom_frame_mappings only impacts the initialization speed of
149
+ # :class:`~torchcodec.decoders.VideoDecoder`, decoding workflows
150
+ # usually involve creating a :class:`~torchcodec.decoders.VideoDecoder` instance,
151
+ # so the performance benefits are realized .
154
152
155
153
156
154
def decode_frames (video_path , seek_mode = "exact" , custom_frame_mappings = None ):
@@ -163,8 +161,8 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
163
161
164
162
165
163
for video_path , json_path in ((short_video_path , short_json_path ), (long_video_path , long_json_path )):
166
- print (f"Running benchmarks on { Path (video_path ).name } " )
167
- print ("Decoding frames with custom_frame_mappings JSON str from file :" )
164
+ print (f"\n Running benchmarks on { Path (video_path ).name } " )
165
+ print ("Decoding frames with custom_frame_mappings:" )
168
166
with open (json_path , "r" ) as f :
169
167
bench (decode_frames , file_like = True , video_path = video_path , custom_frame_mappings = f )
170
168
@@ -175,9 +173,9 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
175
173
# Accuracy: Metadata and frame retrieval
176
174
# --------------------------------------
177
175
#
178
- # We've seen that using custom frame mappings can significantly speed up
179
- # the :class:`~torchcodec.decoders.VideoDecoder` creation. The advantage is that
180
- # seeking is still as accurate as with ``seek_mode="exact"``.
176
+ # In addition to the instantiation speed up compared to ``seek_mode="exact"``, using custom frame mappings
177
+ # also retains the benefit of exact metadata and frame seeking.
178
+ #
181
179
182
180
print ("Metadata of short video with custom_frame_mappings:" )
183
181
with open (short_json_path , "r" ) as f :
@@ -202,9 +200,8 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
202
200
#
203
201
# Custom frame mappings contain the same frame index information
204
202
# that would normally be computed during the :term:`scan` operation in exact mode.
205
- # (frame presentation timestamps (PTS), durations, and keyframe indicators)
206
203
# By providing this information to the :class:`~torchcodec.decoders.VideoDecoder`
207
- # as a JSON, it eliminates the need for the expensive scan while preserving all the
204
+ # as a JSON, it eliminates the need for the expensive scan while preserving the
208
205
# accuracy benefits.
209
206
#
210
207
# Which mode should I use?
0 commit comments