From d8c6c7a6f6ea5b7aaa3bd4141155d37a4efdcbfd Mon Sep 17 00:00:00 2001 From: Rob Walch Date: Wed, 31 May 2023 16:17:25 -0700 Subject: [PATCH] Add mp4 "stsd" codec parsing for Dolby Vision, VP9, AV1, and audio codec passthrough Update contributed mp4a, avc, and hevc mp4 "stsd" codec parsing Clean up mp4 init segment track parsed codec handling Clean up SourceBuffer `changeType` detection and appends --- src/controller/audio-stream-controller.ts | 17 +- src/controller/buffer-controller.ts | 7 +- src/controller/error-controller.ts | 2 +- src/remux/passthrough-remuxer.ts | 29 +- src/utils/mp4-tools.ts | 396 ++++++++-------------- 5 files changed, 184 insertions(+), 267 deletions(-) diff --git a/src/controller/audio-stream-controller.ts b/src/controller/audio-stream-controller.ts index 629448efa36..f3238cec5ae 100644 --- a/src/controller/audio-stream-controller.ts +++ b/src/controller/audio-stream-controller.ts @@ -777,7 +777,12 @@ class AudioStreamController if (initSegment?.tracks) { const mapFragment = frag.initSegment || frag; - this._bufferInitSegment(initSegment.tracks, mapFragment, chunkMeta); + this._bufferInitSegment( + level, + initSegment.tracks, + mapFragment, + chunkMeta + ); hls.trigger(Events.FRAG_PARSING_INIT_SEGMENT, { frag: mapFragment, id, @@ -830,6 +835,7 @@ class AudioStreamController } private _bufferInitSegment( + currentLevel: Level, tracks: TrackSet, frag: Fragment, chunkMeta: ChunkMetadata @@ -848,11 +854,16 @@ class AudioStreamController return; } - track.levelCodec = track.codec; track.id = 'audio'; + + const variantAudioCodecs = currentLevel.audioCodec; this.log( - `Init audio buffer, container:${track.container}, codecs[parsed]=[${track.codec}]` + `Init audio buffer, container:${track.container}, codecs[level/parsed]=[${variantAudioCodecs}/${track.codec}]` ); + // SourceBuffer will use track.levelCodec if defined + if (variantAudioCodecs && variantAudioCodecs.split(',').length === 1) { + track.levelCodec = variantAudioCodecs; + } this.hls.trigger(Events.BUFFER_CODECS, tracks); const initSegment = track.initSegment; if (initSegment?.byteLength) { diff --git a/src/controller/buffer-controller.ts b/src/controller/buffer-controller.ts index c5a41021967..e9906328537 100644 --- a/src/controller/buffer-controller.ts +++ b/src/controller/buffer-controller.ts @@ -30,7 +30,8 @@ import type Hls from '../hls'; import type { LevelDetails } from '../loader/level-details'; const MediaSource = getMediaSource(); -const VIDEO_CODEC_PROFILE_REPACE = /([ha]vc.)(?:\.[^.,]+)+/; +const VIDEO_CODEC_PROFILE_REPLACE = + /(avc[1234]|hvc1|hev1|dvh[1e]|vp09|av01)(?:\.[^.,]+)+/; export default class BufferController implements ComponentAPI { // The level details used to determine duration, target-duration and live @@ -261,11 +262,11 @@ export default class BufferController implements ComponentAPI { const { id, codec, levelCodec, container, metadata } = data[trackName]; const currentCodec = (track.levelCodec || track.codec).replace( - VIDEO_CODEC_PROFILE_REPACE, + VIDEO_CODEC_PROFILE_REPLACE, '$1' ); const nextCodec = (levelCodec || codec).replace( - VIDEO_CODEC_PROFILE_REPACE, + VIDEO_CODEC_PROFILE_REPLACE, '$1' ); if (currentCodec !== nextCodec) { diff --git a/src/controller/error-controller.ts b/src/controller/error-controller.ts index aa86edcbfd7..ef02863cb73 100644 --- a/src/controller/error-controller.ts +++ b/src/controller/error-controller.ts @@ -468,7 +468,7 @@ export default class ErrorController implements NetworkComponentAPI { const { hls, penalizedRenditions } = this; const levelIndex: number = data.parent === PlaylistLevelType.MAIN - ? (data.level as number) + ? (data.level as number) || 0 : hls.loadLevel; const level = hls.levels[levelIndex]; const redundantLevels = level.url.length; diff --git a/src/remux/passthrough-remuxer.ts b/src/remux/passthrough-remuxer.ts index c92d2698e6e..cb66db2ad0b 100644 --- a/src/remux/passthrough-remuxer.ts +++ b/src/remux/passthrough-remuxer.ts @@ -74,14 +74,14 @@ class PassThroughRemuxer implements Remuxer { const initData = (this.initData = parseInitSegment(initSegment)); // Get codec from initSegment or fallback to default - if (!audioCodec) { + if (initData.audio) { audioCodec = getParsedTrackCodec( initData.audio, ElementaryStreamTypes.AUDIO ); } - if (!videoCodec) { + if (initData.video) { videoCodec = getParsedTrackCodec( initData.video, ElementaryStreamTypes.VIDEO @@ -263,27 +263,36 @@ function isInvalidInitPts( } function getParsedTrackCodec( - track: InitDataTrack | undefined, + track: InitDataTrack, type: ElementaryStreamTypes.AUDIO | ElementaryStreamTypes.VIDEO ): string { const parsedCodec = track?.codec; if (parsedCodec && parsedCodec.length > 4) { return parsedCodec; } + if (type === ElementaryStreamTypes.AUDIO) { + if ( + parsedCodec === 'ec-3' || + parsedCodec === 'ac-3' || + parsedCodec === 'alac' + ) { + return parsedCodec; + } + if (parsedCodec === 'fLaC' || parsedCodec === 'Opus') { + return getCodecCompatibleName(parsedCodec); + } + logger.warn(`Unhandled audio codec "${parsedCodec}" or audio object type`); + return 'mp4a.40.5'; + } // Provide defaults based on codec type // This allows for some playback of some fmp4 playlists without CODECS defined in manifest + logger.warn(`Unhandled video codec "${parsedCodec}"`); if (parsedCodec === 'hvc1' || parsedCodec === 'hev1') { return 'hvc1.1.6.L120.90'; } if (parsedCodec === 'av01') { return 'av01.0.04M.08'; } - if (parsedCodec === 'avc1' || type === ElementaryStreamTypes.VIDEO) { - return 'avc1.42e01e'; - } - if (parsedCodec === 'fLaC' || parsedCodec === 'Opus') { - return getCodecCompatibleName(parsedCodec); - } - return 'mp4a.40.5'; + return 'avc1.42e01e'; } export default PassThroughRemuxer; diff --git a/src/utils/mp4-tools.ts b/src/utils/mp4-tools.ts index e6a16c6fb1b..a3c458e8408 100644 --- a/src/utils/mp4-tools.ts +++ b/src/utils/mp4-tools.ts @@ -223,13 +223,11 @@ export function parseInitSegment(initSegment: Uint8Array): InitData { const tkhd = findBox(trak, ['tkhd'])[0]; if (tkhd) { let version = tkhd[0]; - let index = version === 0 ? 12 : 20; - const trackId = readUint32(tkhd, index); + const trackId = readUint32(tkhd, version === 0 ? 12 : 20); const mdhd = findBox(trak, ['mdia', 'mdhd'])[0]; if (mdhd) { version = mdhd[0]; - index = version === 0 ? 12 : 20; - const timescale = readUint32(mdhd, index); + const timescale = readUint32(mdhd, version === 0 ? 12 : 20); const hdlr = findBox(trak, ['mdia', 'hdlr'])[0]; if (hdlr) { const hdlrType = bin2str(hdlr.subarray(8, 12)); @@ -240,126 +238,9 @@ export function parseInitSegment(initSegment: Uint8Array): InitData { if (type) { // Parse codec details const stsd = findBox(trak, ['mdia', 'minf', 'stbl', 'stsd'])[0]; - let codec; - if (stsd) { - codec = bin2str(stsd.subarray(12, 16)); - // Parse codec details to be able to build MIME type - // TODO: Codec parsing support for AV1 - const toHex = (x: number): string => { - return ('0' + x.toString(16).toUpperCase()).slice(-2); - }; - - // Handle H264 - if ( - codec.slice(0, 3) === 'avc' && - codec[3] >= '1' && - codec[3] <= '4' && - stsd.length > 102 && - bin2str(stsd.subarray(98, 102)) === 'avcC' - ) { - // profile + compatibility + level - codec += - '.' + toHex(stsd[111]) + toHex(stsd[112]) + toHex(stsd[113]); - } - - // Handle H265 - else if (isHEVC(codec)) { - // @fogarasyroland's method https://github.com/video-dev/hls.js/pull/5024 - let hvcC; - const codecBox = findBox(stsd.subarray(8), [codec])[0]; - if (codecBox) { - const { end } = parseVisualSampleEntry(codecBox); - hvcC = findBox(codecBox.subarray(end), ['hvcC'])[0]; - if (hvcC) { - codec = mimeTypeBuilderHEVC( - codec, - parseHvcConfigurationRecord(hvcC) - ); - } - } - // @uvjustin's method https://github.com/video-dev/hls.js/pull/4996 - if ( - !hvcC && - stsd.length > 102 && - bin2str(stsd.subarray(98, 102)) === 'hvcC' - ) { - // Profile Space - const profileByte = stsd[103]; - const profileSpace = { 0: '', 1: 'A', 2: 'B', 3: 'C' }[ - profileByte >> 6 - ]; - const generalProfileIdc = profileByte & 31; - codec += '.' + profileSpace + generalProfileIdc; - - // Compatibility - let reversed = 0; - for (let i = 0; i < 4; ++i) { - // byte number - for (let j = 0; j < 8; ++j) { - // bit number - reversed |= - ((stsd[i + 104] >> (7 - j)) & 1) << (31 - 8 * i - j); - } - } - codec += '.' + toHex(reversed >>> 0); - - // Tier Flag - codec += (profileByte & 32 ? '.H' : '.L') + stsd[114]; - - // Constraint String - let hasByte = false; - let constraintString = ''; - for (let i = 113; i > 107; --i) { - if (stsd[i] || hasByte) { - constraintString = - '.' + toHex(stsd[i]) + constraintString; - hasByte = true; - } - } - codec += constraintString; - } - } - - // Handle Audio - else if (codec === 'mp4a') { - // Parse ES Descriptors - let i: number; - // oti - for (i = 0; i < stsd.length - 5; ++i) { - if ( - stsd[i] == 4 && - stsd[i + 1] == 128 && - stsd[i + 2] == 128 && - stsd[i + 3] == 128 - ) { - codec += '.' + toHex(stsd[i + 5]); - break; - } - } - - // dsi - for (i = 0; i < stsd.length - 6; ++i) { - if ( - stsd[i] == 5 && - stsd[i + 1] == 128 && - stsd[i + 2] == 128 && - stsd[i + 3] == 128 - ) { - let dsi = (stsd[i + 5] & 248) >> 3; - if (dsi == 31 && stsd[i + 4] >= 2) { - dsi = - 32 + - ((stsd[i + 5] & 7) << 3) + - ((stsd[i + 6] & 224) >> 5); - } - codec += '.' + dsi; - break; - } - } - } - } + const stsdData = parseStsd(stsd); result[trackId] = { timescale, type }; - result[type] = { timescale, id: trackId, codec }; + result[type] = { timescale, id: trackId, ...stsdData }; } } } @@ -381,6 +262,148 @@ export function parseInitSegment(initSegment: Uint8Array): InitData { return result; } +function parseStsd(stsd: Uint8Array): { codec: string; encrypted: boolean } { + const sampleEntries = stsd.subarray(8); + const sampleEntriesEnd = sampleEntries.subarray(8 + 78); + const fourCC = bin2str(sampleEntries.subarray(4, 8)); + let codec = fourCC; + const encrypted = fourCC === 'enca' || fourCC === 'encv'; + if (encrypted) { + const encBox = findBox(sampleEntries, [fourCC])[0]; + const encBoxChildren = encBox.subarray(fourCC === 'enca' ? 28 : 78); + const sinfs = findBox(encBoxChildren, ['sinf']); + sinfs.forEach((sinf) => { + const schm = findBox(sinf, ['schm'])[0]; + if (schm) { + const scheme = bin2str(schm.subarray(4, 8)); + if (scheme === 'cbcs' || scheme === 'cenc') { + const frma = findBox(sinf, ['frma'])[0]; + if (frma) { + // for encrypted content codec fourCC will be in frma + codec = bin2str(frma); + } + } + } + }); + } + switch (codec) { + case 'avc1': + case 'avc2': + case 'avc3': + case 'avc4': + // profile + compatibility + level + codec += '.' + toHex(stsd[111]) + toHex(stsd[112]) + toHex(stsd[113]); + break; + case 'mp4a': { + const codecBox = findBox(sampleEntries, [fourCC])[0]; + const esdsBox = findBox(codecBox.subarray(28), ['esds'])[0]; + if (esdsBox && esdsBox.length > 12 && esdsBox[11] !== 0) { + codec += '.' + toHex(esdsBox[11]); + codec += '.' + ((esdsBox[12] >>> 2) & 0x3f).toString(16).toUpperCase(); + } + break; + } + // break; + case 'hvc1': + case 'hev1': { + const hvcCBox = findBox(sampleEntriesEnd, ['hvcC'])[0]; + const profileByte = hvcCBox[1]; + const profileSpace = ['', 'A', 'B', 'C'][profileByte >> 6]; + const generalProfileIdc = profileByte & 0x1f; + const profileCompat = readUint32(hvcCBox, 2); + const tierFlag = (profileByte & 0x20) >> 5 ? 'H' : 'L'; + const levelIDC = hvcCBox[12]; + const constraintIndicator = hvcCBox.subarray(6, 12); + codec += '.' + profileSpace + generalProfileIdc; + codec += '.' + profileCompat.toString(16).toUpperCase(); + codec += '.' + tierFlag + levelIDC; + let constraintString = ''; + for (let i = constraintIndicator.length; i--; ) { + const byte = constraintIndicator[i]; + if (byte || constraintString) { + const encodedByte = byte.toString(16).toUpperCase(); + constraintString = '.' + encodedByte + constraintString; + } + } + codec += constraintString; + break; + } + case 'dvh1': + case 'dvhe': { + const dvcCBox = findBox(sampleEntriesEnd, ['dvcC'])[0]; + const profile = (dvcCBox[2] >> 1) & 0x7f; + const level = ((dvcCBox[2] << 5) & 0x20) | ((dvcCBox[3] >> 3) & 0x1f); + codec += '.' + addLeadingZero(profile) + '.' + addLeadingZero(level); + break; + } + case 'vp09': { + const vpcCBox = findBox(sampleEntriesEnd, ['vpcC'])[0]; + const profile = vpcCBox[4]; + const level = vpcCBox[5]; + const bitDepth = (vpcCBox[6] >> 4) & 0x0f; + codec += + '.' + + addLeadingZero(profile) + + '.' + + addLeadingZero(level) + + '.' + + addLeadingZero(bitDepth); + break; + } + case 'av01': { + const av1CBox = findBox(sampleEntriesEnd, ['av1C'])[0]; + const profile = av1CBox[1] >>> 3; + const level = av1CBox[1] & 0x1f; + const tierFlag = av1CBox[2] >>> 7 ? 'H' : 'M'; + const highBitDepth = (av1CBox[2] & 0x40) >> 6; + const twelveBit = (av1CBox[2] & 0x20) >> 5; + const bitDepth = + profile === 2 && highBitDepth + ? twelveBit + ? 12 + : 10 + : highBitDepth + ? 10 + : 8; + const monochrome = (av1CBox[2] & 0x10) >> 4; + const chromaSubsamplingX = (av1CBox[2] & 0x08) >> 3; + const chromaSubsamplingY = (av1CBox[2] & 0x04) >> 2; + const chromaSamplePosition = av1CBox[2] & 0x03; + codec += + '.' + + profile + + '.' + + addLeadingZero(level) + + tierFlag + + '.' + + addLeadingZero(bitDepth) + + '.' + + monochrome + + '.' + + chromaSubsamplingX + + chromaSubsamplingY + + chromaSamplePosition; + break; + } + case 'ac-3': + case 'ec-3': + case 'alac': + case 'fLaC': + case 'Opus': + default: + break; + } + return { codec, encrypted }; +} + +function toHex(x: number): string { + return ('0' + x.toString(16).toUpperCase()).slice(-2); +} + +function addLeadingZero(num: number): string { + return (num < 10 ? '0' : '') + num; +} + export function patchEncyptionData( initSegment: Uint8Array | undefined, decryptdata: DecryptData | null @@ -1261,130 +1284,3 @@ export function parsePssh(initData: ArrayBuffer) { } return result; } - -function mimeTypeBuilderHEVC( - codecName: string, - codecDetails: hvcConfigurationRecord -): string { - const generalProfileSpaceMap = ['', 'A', 'B', 'C']; - let codecMimeType = codecName; - if (codecDetails) { - codecMimeType += '.'; - codecMimeType += generalProfileSpaceMap[codecDetails.generalProfileSpace]; - codecMimeType += codecDetails.generalProfileIdc; - - codecMimeType += '.'; - codecMimeType += codecDetails.generalProfileCompatibility.toString(16)[0]; - - codecMimeType += '.'; - codecMimeType += codecDetails.generalTierFlag === 0 ? 'L' : 'H'; - codecMimeType += codecDetails.generalLevelIdc; - - let constraintString = ''; - const lastByteIndex = ( - codecDetails.generalConstraintIndicator as any - ).findLastIndex((x: Number) => x !== 0); - if (lastByteIndex !== -1) { - constraintString = - '.' + - ( - codecDetails.generalConstraintIndicator.slice( - 0, - lastByteIndex + 1 - ) as any - ) - .map((x: Number) => x.toString(16)) - .join('.'); - } - - codecMimeType += constraintString; - } - return codecMimeType; -} - -function parseDataReferenceIndex(data: Uint8Array) { - // UInt8[6] reserved - return readUint16(data, 6); -} - -interface visualSampleEntry { - dataReferenceIndex: number; - width: number; - height: number; - horizResolution: number; - vertResolution: number; - frameCount: number; - compressorName: string; - depth: number; - end: number; -} - -function parseVisualSampleEntry(data: Uint8Array): visualSampleEntry { - return { - dataReferenceIndex: parseDataReferenceIndex(data), - - // UInt16 preDefined - // UInt16 reserved - // UInt32[3] preDefined - - width: readUint16(data, 24), - height: readUint16(data, 26), - - horizResolution: readUint32(data, 28), // 0x00480000 - 72 dpi - vertResolution: readUint32(data, 32), // 0x00480000 - 72 dpi - - // UInt32 reserved - - frameCount: readUint16(data, 40), - compressorName: bin2str(data.subarray(43, Math.min(data[42], 31))), - depth: readUint16(data, 74), - - // UInt16 preDefined - - end: 78, - }; -} - -interface hvcConfigurationRecord { - configurationVersion: number; - generalProfileSpace: number; - generalTierFlag: number; - generalProfileIdc: number; - generalProfileCompatibility: number; - generalConstraintIndicator: Uint8Array; - generalLevelIdc: number; - minSpatialSegmentationIdc: number; - parallelismType: number; - chromaFormatIdc: number; - bitDepthLumaMinus8: number; - bitDepthChromaMinus8: number; - avgFrameRate: number; - constantFrameRate: number; - numTemporalLayers: number; - temporalIdNested: number; - lengthSizeMinusOne: number; - naluArrays: Array; -} - -function parseHvcConfigurationRecord(data: Uint8Array): hvcConfigurationRecord { - return { - configurationVersion: data[0], - generalProfileSpace: data[1] >> 6, - generalTierFlag: (data[1] & 0x20) >> 5, - generalProfileIdc: data[1] & 0x1f, - generalProfileCompatibility: readUint32(data, 2), - generalConstraintIndicator: data.subarray(6, 12), - generalLevelIdc: data[12], - minSpatialSegmentationIdc: readUint16(data, 13) & 0xfff, - parallelismType: data[15] & 0x3, - chromaFormatIdc: data[16] & 0x3, - bitDepthLumaMinus8: data[17] & 0x7, - bitDepthChromaMinus8: data[18] & 0x7, - avgFrameRate: readUint16(data, 19), - constantFrameRate: data[21] >> 6, - numTemporalLayers: (data[21] & 0xd) >> 3, - temporalIdNested: (data[21] & 0x4) >> 2, - lengthSizeMinusOne: data[21] & 0x3, - naluArrays: [], - }; -}