From 7a0e9ec65577809f17413a6439d7d51c7bfd393c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?A=CC=81lvaro=20Velad=20Galva=CC=81n?= Date: Thu, 2 Nov 2023 12:34:34 +0100 Subject: [PATCH] fix: Fix nalu parsing and improve performance in the transmuxer --- externs/shaka/codecs.js | 8 +- lib/media/media_source_engine.js | 2 +- lib/transmuxer/ts_transmuxer.js | 12 +- lib/util/ts_parser.js | 197 +++++++++++++++++++------------ lib/util/uint8array_utils.js | 6 +- test/util/ts_parser_unit.js | 6 +- 6 files changed, 140 insertions(+), 91 deletions(-) diff --git a/externs/shaka/codecs.js b/externs/shaka/codecs.js index 71bf6d964a..4ead28d7f2 100644 --- a/externs/shaka/codecs.js +++ b/externs/shaka/codecs.js @@ -1,16 +1,18 @@ /** * @typedef {{ - * data: Uint8Array, + * data: !Uint8Array, * packetLength: number, * pts: ?number, - * dts: ?number + * dts: ?number, + * nalus: !Array. * }} * * @summary MPEG_PES. - * @property {Uint8Array} data + * @property {!Uint8Array} data * @property {number} packetLength * @property {?number} pts * @property {?number} dts + * @property {!Array.} nalus */ shaka.extern.MPEG_PES; diff --git a/lib/media/media_source_engine.js b/lib/media/media_source_engine.js index 505e14519a..0ec46a1711 100644 --- a/lib/media/media_source_engine.js +++ b/lib/media/media_source_engine.js @@ -783,7 +783,7 @@ shaka.media.MediaSourceEngine = class { } else if (!mimeType.includes('/mp4') && !mimeType.includes('/webm') && shaka.util.TsParser.probe(uint8ArrayData)) { const tsParser = new shaka.util.TsParser().parse(uint8ArrayData); - const startTime = tsParser.getStartTime()[contentType]; + const startTime = tsParser.getStartTime(contentType); if (startTime != null) { timestamp = startTime; } diff --git a/lib/transmuxer/ts_transmuxer.js b/lib/transmuxer/ts_transmuxer.js index 8f0c8ac9ff..4f6b1dab97 100644 --- a/lib/transmuxer/ts_transmuxer.js +++ b/lib/transmuxer/ts_transmuxer.js @@ -706,11 +706,7 @@ shaka.transmuxer.TsTransmuxer = class { const videoData = tsParser.getVideoData(); for (let i = 0; i < videoData.length; i++) { const pes = videoData[i]; - let nextPes; - if (i + 1 < videoData.length) { - nextPes = videoData[i + 1]; - } - const dataNalus = tsParser.parseNalus(pes, nextPes); + const dataNalus = pes.nalus; nalus = nalus.concat(dataNalus); const frame = H264.parseFrame(dataNalus); if (!frame) { @@ -796,11 +792,7 @@ shaka.transmuxer.TsTransmuxer = class { const videoData = tsParser.getVideoData(); for (let i = 0; i < videoData.length; i++) { const pes = videoData[i]; - let nextPes; - if (i + 1 < videoData.length) { - nextPes = videoData[i + 1]; - } - const dataNalus = tsParser.parseNalus(pes, nextPes); + const dataNalus = pes.nalus; nalus = nalus.concat(dataNalus); const frame = H265.parseFrame(dataNalus); if (!frame) { diff --git a/lib/util/ts_parser.js b/lib/util/ts_parser.js index 50c9fc2856..608f9173bb 100644 --- a/lib/util/ts_parser.js +++ b/lib/util/ts_parser.js @@ -35,6 +35,9 @@ shaka.util.TsParser = class { /** @private {!Array.} */ this.videoData_ = []; + /** @private {!Array.} */ + this.videoPes_ = []; + /** @private {?number} */ this.audioPid_ = null; @@ -44,6 +47,9 @@ shaka.util.TsParser = class { /** @private {!Array.} */ this.audioData_ = []; + /** @private {!Array.} */ + this.audioPes_ = []; + /** @private {?number} */ this.id3Pid_ = null; @@ -58,7 +64,9 @@ shaka.util.TsParser = class { */ clearData() { this.videoData_ = []; + this.videoPes_ = []; this.audioData_ = []; + this.audioPes_ = []; this.id3Data_ = []; } @@ -360,6 +368,7 @@ shaka.util.TsParser = class { packetLength: ((data[4] << 8) | data[5]), pts: null, dts: null, + nalus: [], }; // if PES parsed length is not zero and greater than total received length, @@ -430,7 +439,7 @@ shaka.util.TsParser = class { shaka.Deprecate.deprecateFeature(5, 'TsParser', 'Please use parseNalus function instead.'); - return this.parseNalus(pes, nextPes); + return this.parseNalus(pes); } /** @@ -440,12 +449,11 @@ shaka.util.TsParser = class { * Credit to https://github.com/video-dev/hls.js/blob/master/src/demux/tsdemuxer.ts * * @param {shaka.extern.MPEG_PES} pes - * @param {?shaka.extern.MPEG_PES=} nextPes * @param {?shaka.extern.VideoNalu=} lastNalu * @return {!Array.} * @export */ - parseNalus(pes, nextPes, lastNalu) { + parseNalus(pes, lastNalu) { const timescale = shaka.util.TsParser.Timescale; const time = pes.pts ? pes.pts / timescale : null; const data = pes.data; @@ -479,21 +487,21 @@ shaka.util.TsParser = class { const value = data[i]; if (!value) { numZeros++; - } else if (numZeros >= 2 && value == 1 && lastNalu) { - // If we are scanning the next PES, we need append the data to the - // previous Nalu and don't scan for more nalus. - const startCodeSize = numZeros > 3 ? 3 : numZeros; - const lastByteToKeep = i - startCodeSize; - // Optimization - if (lastByteToKeep == 0) { - return []; - } - lastNalu.data = shaka.util.Uint8ArrayUtils.concat( - lastNalu.data, data.subarray(0, lastByteToKeep)); - lastNalu.fullData = shaka.util.Uint8ArrayUtils.concat( - lastNalu.fullData, data.subarray(0, lastByteToKeep)); - return []; } else if (numZeros >= 2 && value == 1) { + if (lastNalu && !nalus.length && lastNaluStart == -1) { + // If we are scanning the next PES, we need append the data to the + // previous Nalu and don't scan for more nalus. + const startCodeSize = numZeros > 3 ? 3 : numZeros; + const lastByteToKeep = i - startCodeSize; + // Optimization + if (lastByteToKeep != 0) { + const prevData = data.subarray(0, lastByteToKeep); + lastNalu.data = shaka.util.Uint8ArrayUtils.concat( + lastNalu.data, prevData); + lastNalu.fullData = shaka.util.Uint8ArrayUtils.concat( + lastNalu.fullData, prevData); + } + } // We just read a start code. Consume the NALU we passed, if any. if (lastNaluStart >= 0) { // Because the start position includes the header size. @@ -513,11 +521,14 @@ shaka.util.TsParser = class { time: time, }; nalus.push(nalu); - } else if (lastNalu) { + } else if (lastNalu && !nalus.length) { const overflow = i - numZeros; if (overflow > 0) { + const prevData = data.subarray(0, overflow); lastNalu.data = shaka.util.Uint8ArrayUtils.concat( - lastNalu.data, data.subarray(0, overflow)); + lastNalu.data, prevData); + lastNalu.fullData = shaka.util.Uint8ArrayUtils.concat( + lastNalu.fullData, prevData); } } @@ -553,9 +564,6 @@ shaka.util.TsParser = class { type: lastNaluType, time: time, }; - if (nextPes) { - this.parseNalus(nextPes, /* nextPes= */ null, infoOfLastNalu); - } } } @@ -602,73 +610,122 @@ shaka.util.TsParser = class { * @export */ getAudioData() { - const audio = []; - for (const audioData of this.audioData_) { - const pes = this.parsePES_(audioData); - if (pes) { - audio.push(pes); + if (this.audioData_.length && !this.audioPes_.length) { + let sort = false; + for (const audioData of this.audioData_) { + const pes = this.parsePES_(audioData); + if (pes && pes.pts != null && pes.dts != null) { + if (this.audioPes_.length && + pes.dts < (this.audioPes_[this.audioPes_.length - 1].dts || 0)) { + sort = true; + } + this.audioPes_.push(pes); + } else if (this.audioPes_.length) { + const data = pes ? pes.data : audioData; + if (!data) { + continue; + } + const previousPes = this.audioPes_.pop(); + previousPes.data = + shaka.util.Uint8ArrayUtils.concat(previousPes.data, data); + this.audioPes_.push(previousPes); + } + } + if (sort) { + this.audioPes_ = this.audioPes_.sort((a, b) => { + const deltadts = (a.dts || 0) - (b.dts || 0); + const deltapts = (a.pts || 0) - (b.pts || 0); + return deltadts || deltapts; + }); } } - return audio; + return this.audioPes_; } /** * Return the audio data * + * @param {boolean=} naluProcessing * @return {!Array.} * @export */ - getVideoData() { - const Uint8ArrayUtils = shaka.util.Uint8ArrayUtils; - const video = []; - for (const videoData of this.videoData_) { - const pes = this.parsePES_(videoData); - if (pes && pes.pts != null && pes.dts != null) { - video.push(pes); - } else if (video.length) { - const data = pes ? pes.data : videoData; - if (!data) { - continue; + getVideoData(naluProcessing = true) { + if (this.videoData_.length && !this.videoPes_.length) { + let sort = false; + for (const videoData of this.videoData_) { + const pes = this.parsePES_(videoData); + if (pes && pes.pts != null && pes.dts != null) { + if (this.videoPes_.length && + pes.dts < (this.videoPes_[this.videoPes_.length - 1].dts || 0)) { + sort = true; + } + this.videoPes_.push(pes); + } else if (this.videoPes_.length) { + const data = pes ? pes.data : videoData; + if (!data) { + continue; + } + const previousPes = this.videoPes_.pop(); + previousPes.data = + shaka.util.Uint8ArrayUtils.concat(previousPes.data, data); + this.videoPes_.push(previousPes); } - const previousPes = video.pop(); - previousPes.data = - Uint8ArrayUtils.concat(previousPes.data, data); - video.push(previousPes); } + if (naluProcessing) { + let lastNalu; + for (const pes of this.videoPes_) { + pes.nalus = this.parseNalus(pes, lastNalu); + if (pes.nalus.length) { + lastNalu = pes.nalus[pes.nalus.length - 1]; + } + } + this.videoPes_ = this.videoPes_.filter((pes) => { + return pes.nalus.length; + }); + } + if (sort) { + this.videoPes_ = this.videoPes_.sort((a, b) => { + const deltadts = (a.dts || 0) - (b.dts || 0); + const deltapts = (a.pts || 0) - (b.pts || 0); + return deltadts || deltapts; + }); + } + } + if (!naluProcessing) { + const prevVideoPes = this.videoPes_; + this.videoPes_ = []; + return prevVideoPes; } - return video; + return this.videoPes_; } /** * Return the start time for the audio and video * - * @return {{audio: ?number, video: ?number}} + * @param {string} contentType + * @return {?number} * @export */ - getStartTime() { + getStartTime(contentType) { const timescale = shaka.util.TsParser.Timescale; - let audioStartTime = null; - for (const pes of this.getAudioData()) { - if (pes && pes.pts != null) { - const startTime = Math.min(pes.dts, pes.pts) / timescale; - if (audioStartTime == null || audioStartTime > startTime) { - audioStartTime = startTime; - } + if (contentType == 'audio') { + let audioStartTime = null; + const audioData = this.getAudioData(); + if (audioData.length) { + const pes = audioData[0]; + audioStartTime = Math.min(pes.dts, pes.pts) / timescale; } - } - let videoStartTime = null; - for (const pes of this.getVideoData()) { - if (pes && pes.pts != null) { - const startTime = Math.min(pes.dts, pes.pts) / timescale; - if (videoStartTime == null || videoStartTime > startTime) { - videoStartTime = startTime; - } + return audioStartTime; + } else if (contentType == 'video') { + let videoStartTime = null; + const videoData = this.getVideoData(/* naluProcessing= */ false); + if (videoData.length) { + const pes = videoData[0]; + videoStartTime = Math.min(pes.dts, pes.pts) / timescale; } + return videoStartTime; } - return { - audio: audioStartTime, - video: videoStartTime, - }; + return null; } /** @@ -692,14 +749,8 @@ shaka.util.TsParser = class { */ getVideoNalus() { const nalus = []; - const videoData = this.getVideoData(); - for (let i = 0; i < videoData.length; i++) { - const pes = videoData[i]; - let nextPes; - if (i + 1 < videoData.length) { - nextPes = videoData[i + 1]; - } - nalus.push(...this.parseNalus(pes, nextPes)); + for (const pes of this.getVideoData()) { + nalus.push(...pes.nalus); } return nalus; } diff --git a/lib/util/uint8array_utils.js b/lib/util/uint8array_utils.js index 87c3761022..acf44b4cd5 100644 --- a/lib/util/uint8array_utils.js +++ b/lib/util/uint8array_utils.js @@ -115,7 +115,11 @@ shaka.util.Uint8ArrayUtils = class { const result = new Uint8Array(totalLength); let offset = 0; for (const arr of varArgs) { - result.set(shaka.util.BufferUtils.toUint8(arr), offset); + if (arr instanceof Uint8Array) { + result.set(arr, offset); + } else { + result.set(shaka.util.BufferUtils.toUint8(arr), offset); + } offset += arr.byteLength; } return result; diff --git a/test/util/ts_parser_unit.js b/test/util/ts_parser_unit.js index cffa9e26af..3190e094ec 100644 --- a/test/util/ts_parser_unit.js +++ b/test/util/ts_parser_unit.js @@ -7,6 +7,7 @@ describe('TsParser', () => { const Util = shaka.test.Util; const BufferUtils = shaka.util.BufferUtils; + const ContentType = shaka.util.ManifestParserUtils.ContentType; it('probes a TS segment', async () => { const responses = await Promise.all([ @@ -59,9 +60,8 @@ describe('TsParser', () => { ]); const tsSegment = BufferUtils.toUint8(responses[0]); const starttime = new shaka.util.TsParser().parse(tsSegment) - .getStartTime(); - expect(starttime.audio).toBeCloseTo(90019.586, 3); - expect(starttime.video).toBe(null); + .getStartTime(ContentType.AUDIO); + expect(starttime).toBeCloseTo(90019.586, 3); }); it('get the codecs from a TS segment', async () => {