Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update video reader to use new decoder #1978

Merged
merged 10 commits into from
Mar 17, 2020
31 changes: 19 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,17 +152,6 @@ def get_extensions():

include_dirs = [extensions_dir]

ffmpeg_exe = distutils.spawn.find_executable('ffmpeg')
has_ffmpeg = ffmpeg_exe is not None
if has_ffmpeg:
ffmpeg_bin = os.path.dirname(ffmpeg_exe)
ffmpeg_root = os.path.dirname(ffmpeg_bin)
ffmpeg_include_dir = os.path.join(ffmpeg_root, 'include')

# TorchVision video reader
video_reader_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'cpu', 'video_reader')
video_reader_src = glob.glob(os.path.join(video_reader_src_dir, "*.cpp"))

ext_modules = [
extension(
'torchvision._C',
Expand All @@ -182,12 +171,30 @@ def get_extensions():
extra_compile_args=extra_compile_args,
)
)

ffmpeg_exe = distutils.spawn.find_executable('ffmpeg')
has_ffmpeg = ffmpeg_exe is not None

if has_ffmpeg:
ffmpeg_bin = os.path.dirname(ffmpeg_exe)
ffmpeg_root = os.path.dirname(ffmpeg_bin)
ffmpeg_include_dir = os.path.join(ffmpeg_root, 'include')

# TorchVision base decoder + video reader
video_reader_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'cpu', 'video_reader')
video_reader_src = glob.glob(os.path.join(video_reader_src_dir, "*.cpp"))
base_decoder_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'cpu', 'decoder')
base_decoder_src = glob.glob(
os.path.join(base_decoder_src_dir, "[!sync_decoder_test,!utils_test]*.cpp"))

combined_src = video_reader_src + base_decoder_src

ext_modules.append(
CppExtension(
'torchvision.video_reader',
video_reader_src,
combined_src,
include_dirs=[
base_decoder_src_dir,
video_reader_src_dir,
ffmpeg_include_dir,
extensions_dir,
Expand Down
129 changes: 116 additions & 13 deletions test/test_video_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ def compare_decoding_result(self, tv_result, ref_result, config=all_check_config
def test_stress_test_read_video_from_file(self):
num_iter = 10000
# video related
width, height, min_dimension = 0, 0, 0
width, height, min_dimension, max_dimension = 0, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -416,6 +416,7 @@ def test_stress_test_read_video_from_file(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -434,7 +435,7 @@ def test_read_video_from_file(self):
Test the case when decoder starts with a video file to decode frames.
"""
# video related
width, height, min_dimension = 0, 0, 0
width, height, min_dimension, max_dimension = 0, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -454,6 +455,7 @@ def test_read_video_from_file(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -479,7 +481,7 @@ def test_read_video_from_file_read_single_stream_only(self):
only reads video stream and ignores audio stream
"""
# video related
width, height, min_dimension = 0, 0, 0
width, height, min_dimension, max_dimension = 0, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -499,6 +501,7 @@ def test_read_video_from_file_read_single_stream_only(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand Down Expand Up @@ -536,7 +539,7 @@ def test_read_video_from_file_rescale_min_dimension(self):
video min dimension between height and width is set.
"""
# video related
width, height, min_dimension = 0, 0, 128
width, height, min_dimension, max_dimension = 0, 0, 128, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -555,6 +558,7 @@ def test_read_video_from_file_rescale_min_dimension(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -571,13 +575,100 @@ def test_read_video_from_file_rescale_min_dimension(self):
min_dimension, min(tv_result[0].size(1), tv_result[0].size(2))
)

def test_read_video_from_file_rescale_max_dimension(self):
"""
Test the case when decoder starts with a video file to decode frames, and
video min dimension between height and width is set.
"""
# video related
width, height, min_dimension, max_dimension = 0, 0, 0, 85
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
samples, channels = 0, 0
audio_start_pts, audio_end_pts = 0, -1
audio_timebase_num, audio_timebase_den = 0, 1

for test_video, _config in test_videos.items():
full_path = os.path.join(VIDEO_DIR, test_video)

tv_result = torch.ops.video_reader.read_video_from_file(
full_path,
seek_frame_margin,
0, # getPtsOnly
1, # readVideoStream
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
video_timebase_den,
1, # readAudioStream
samples,
channels,
audio_start_pts,
audio_end_pts,
audio_timebase_num,
audio_timebase_den,
)
self.assertEqual(
max_dimension, max(tv_result[0].size(1), tv_result[0].size(2))
)

def test_read_video_from_file_rescale_both_min_max_dimension(self):
"""
Test the case when decoder starts with a video file to decode frames, and
video min dimension between height and width is set.
"""
# video related
width, height, min_dimension, max_dimension = 0, 0, 64, 85
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
samples, channels = 0, 0
audio_start_pts, audio_end_pts = 0, -1
audio_timebase_num, audio_timebase_den = 0, 1

for test_video, _config in test_videos.items():
full_path = os.path.join(VIDEO_DIR, test_video)

tv_result = torch.ops.video_reader.read_video_from_file(
full_path,
seek_frame_margin,
0, # getPtsOnly
1, # readVideoStream
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
video_timebase_den,
1, # readAudioStream
samples,
channels,
audio_start_pts,
audio_end_pts,
audio_timebase_num,
audio_timebase_den,
)
self.assertEqual(
min_dimension, min(tv_result[0].size(1), tv_result[0].size(2))
)
self.assertEqual(
max_dimension, max(tv_result[0].size(1), tv_result[0].size(2))
)

def test_read_video_from_file_rescale_width(self):
"""
Test the case when decoder starts with a video file to decode frames, and
video width is set.
"""
# video related
width, height, min_dimension = 256, 0, 0
width, height, min_dimension, max_dimension = 256, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -596,6 +687,7 @@ def test_read_video_from_file_rescale_width(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -616,7 +708,7 @@ def test_read_video_from_file_rescale_height(self):
video height is set.
"""
# video related
width, height, min_dimension = 0, 224, 0
width, height, min_dimension, max_dimension = 0, 224, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -635,6 +727,7 @@ def test_read_video_from_file_rescale_height(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -655,7 +748,7 @@ def test_read_video_from_file_rescale_width_and_height(self):
both video height and width are set.
"""
# video related
width, height, min_dimension = 320, 240, 0
width, height, min_dimension, max_dimension = 320, 240, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -674,6 +767,7 @@ def test_read_video_from_file_rescale_width_and_height(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -697,7 +791,7 @@ def test_read_video_from_file_audio_resampling(self):

for samples in [9600, 96000]: # downsampling # upsampling
# video related
width, height, min_dimension = 0, 0, 0
width, height, min_dimension, max_dimension = 0, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -716,6 +810,7 @@ def test_read_video_from_file_audio_resampling(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand Down Expand Up @@ -752,7 +847,7 @@ def test_compare_read_video_from_memory_and_file(self):
Test the case when video is already in memory, and decoder reads data in memory
"""
# video related
width, height, min_dimension = 0, 0, 0
width, height, min_dimension, max_dimension = 0, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -772,6 +867,7 @@ def test_compare_read_video_from_memory_and_file(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -794,6 +890,7 @@ def test_compare_read_video_from_memory_and_file(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -816,7 +913,7 @@ def test_read_video_from_memory(self):
Test the case when video is already in memory, and decoder reads data in memory
"""
# video related
width, height, min_dimension = 0, 0, 0
width, height, min_dimension, max_dimension = 0, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -836,6 +933,7 @@ def test_read_video_from_memory(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -861,7 +959,7 @@ def test_read_video_from_memory_get_pts_only(self):
for both pts and frame data
"""
# video related
width, height, min_dimension = 0, 0, 0
width, height, min_dimension, max_dimension = 0, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -881,6 +979,7 @@ def test_read_video_from_memory_get_pts_only(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -904,6 +1003,7 @@ def test_read_video_from_memory_get_pts_only(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand All @@ -930,7 +1030,7 @@ def test_read_video_in_range_from_memory(self):
for test_video, config in test_videos.items():
full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
# video related
width, height, min_dimension = 0, 0, 0
width, height, min_dimension, max_dimension = 0, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand All @@ -946,6 +1046,7 @@ def test_read_video_in_range_from_memory(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand Down Expand Up @@ -1000,6 +1101,7 @@ def test_read_video_in_range_from_memory(self):
width,
height,
min_dimension,
max_dimension,
video_start_pts,
video_end_pts,
video_timebase_num,
Expand Down Expand Up @@ -1099,7 +1201,7 @@ def test_read_video_from_memory_scripted(self):
Test the case when video is already in memory, and decoder reads data in memory
"""
# video related
width, height, min_dimension = 0, 0, 0
width, height, min_dimension, max_dimension = 0, 0, 0, 0
video_start_pts, video_end_pts = 0, -1
video_timebase_num, video_timebase_den = 0, 1
# audio related
Expand Down Expand Up @@ -1130,6 +1232,7 @@ def test_read_video_from_memory_scripted(self):
[audio_start_pts, audio_end_pts],
audio_timebase_num,
audio_timebase_den,
max_dimension,
)
# FUTURE: check value of video / audio frames

Expand Down
Loading