Skip to content

Commit

Permalink
More robust key frame index setting (#489)
Browse files Browse the repository at this point in the history
  • Loading branch information
NicolasHug authored Jan 31, 2025
1 parent eb88cd6 commit 93fff37
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 12 deletions.
23 changes: 11 additions & 12 deletions src/torchcodec/decoders/_core/VideoDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ void VideoDecoder::scanFileAndUpdateMetadataAndIndex() {
// we have scanned all packets and sorted by pts.
FrameInfo frameInfo = {packet->pts};
if (packet->flags & AV_PKT_FLAG_KEY) {
frameInfo.isKeyFrame = true;
streamInfos_[streamIndex].keyFrames.push_back(frameInfo);
}
streamInfos_[streamIndex].allFrames.push_back(frameInfo);
Expand Down Expand Up @@ -658,25 +659,23 @@ void VideoDecoder::scanFileAndUpdateMetadataAndIndex() {
return frameInfo1.pts < frameInfo2.pts;
});

size_t keyIndex = 0;
size_t keyFrameIndex = 0;
for (size_t i = 0; i < streamInfo.allFrames.size(); ++i) {
streamInfo.allFrames[i].frameIndex = i;

// For correctly encoded files, we shouldn't need to ensure that keyIndex
// is less than the number of key frames. That is, the relationship
// between the frames in allFrames and keyFrames should be such that
// keyIndex is always a valid index into keyFrames. But we're being
// defensive in case we encounter incorrectly encoded files.
if (keyIndex < streamInfo.keyFrames.size() &&
streamInfo.keyFrames[keyIndex].pts == streamInfo.allFrames[i].pts) {
streamInfo.keyFrames[keyIndex].frameIndex = i;
++keyIndex;
if (streamInfo.allFrames[i].isKeyFrame) {
TORCH_CHECK(
keyFrameIndex < streamInfo.keyFrames.size(),
"The allFrames vec claims it has MORE keyFrames than the keyFrames vec. There's a bug in torchcodec.");
streamInfo.keyFrames[keyFrameIndex].frameIndex = i;
++keyFrameIndex;
}

if (i + 1 < streamInfo.allFrames.size()) {
streamInfo.allFrames[i].nextPts = streamInfo.allFrames[i + 1].pts;
}
}
TORCH_CHECK(
keyFrameIndex == streamInfo.keyFrames.size(),
"The allFrames vec claims it has LESS keyFrames than the keyFrames vec. There's a bug in torchcodec.");
}

scannedAllStreams_ = true;
Expand Down
7 changes: 7 additions & 0 deletions src/torchcodec/decoders/_core/VideoDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,13 +294,20 @@ class VideoDecoder {
// FrameInfo structs with *increasing* nextPts values. That's a necessary
// condition for the binary searches on those values to work properly (as
// typically done during pts -> index conversions).
// TODO: This field is unset (left to the default) for entries in the
// keyFrames vec!
int64_t nextPts = INT64_MAX;

// Note that frameIndex is ALWAYS the index into all of the frames in that
// stream, even when the FrameInfo is part of the key frame index. Given a
// FrameInfo for a key frame, the frameIndex allows us to know which frame
// that is in the stream.
int64_t frameIndex = 0;

// Indicates whether a frame is a key frame. It may appear redundant as it's
// only true for FrameInfos in the keyFrames index, but it is needed to
// correctly map frames between allFrames and keyFrames during the scan.
bool isKeyFrame = false;
};

struct FilterGraphContext {
Expand Down

0 comments on commit 93fff37

Please sign in to comment.