From 3e0c9d063a31772a398cc025e7a174eaf320c664 Mon Sep 17 00:00:00 2001 From: Borewit Date: Mon, 17 Jan 2022 15:39:09 +0100 Subject: [PATCH] Handle invalid (odd octet length) UTF-16 string Register a warning instead of a fatal error. Resolves Borewit/music-metadata#979 --- lib/common/Util.ts | 21 ++++++++++----------- lib/id3v2/FrameParser.ts | 7 ++++++- test/samples/mp3/issue-979.mp3 | Bin 0 -> 8153 bytes test/test-file-mp3.ts | 28 ++++++++++++++++++++++------ 4 files changed, 38 insertions(+), 18 deletions(-) create mode 100644 test/samples/mp3/issue-979.mp3 diff --git a/lib/common/Util.ts b/lib/common/Util.ts index 7377f0a42..dba5789b2 100644 --- a/lib/common/Util.ts +++ b/lib/common/Util.ts @@ -9,6 +9,11 @@ export type StringEncoding = | 'latin1' // Same as ISO-8859-1 (alias: 'binary') | 'hex'; +export interface ITextEncoding { + encoding: StringEncoding; + bom?: boolean; +} + export function getBit(buf: Uint8Array, off: number, bit: number): boolean { return (buf[off] & (1 << bit)) !== 0; } @@ -56,26 +61,20 @@ function swapBytes(uint8Array: T): T { /** - * - * @param buffer Decoder input data - * @param encoding 'utf16le' | 'utf16' | 'utf8' | 'iso-8859-1' - * @return {string} + * Decode string */ export function decodeString(buffer: Buffer, encoding: StringEncoding): string { // annoying workaround for a double BOM issue // https://github.com/leetreveil/musicmetadata/issues/84 - let offset = 0; if (buffer[0] === 0xFF && buffer[1] === 0xFE) { // little endian - if (encoding === 'utf16le') { - offset = 2; - } else if (buffer[2] === 0xFE && buffer[3] === 0xFF) { - offset = 2; // Clear double BOM - } + return decodeString(buffer.subarray(2), encoding); } else if (encoding === 'utf16le' && buffer[0] === 0xFE && buffer[1] === 0xFF) { // BOM, indicating big endian decoding + if ((buffer.length & 1) !== 0) + throw new Error('Expected even number of octets for 16-bit unicode string'); return decodeString(swapBytes(buffer), encoding); } - return buffer.toString(encoding, offset); + return buffer.toString(encoding); } export function stripNulls(str: string): string { diff --git a/lib/id3v2/FrameParser.ts b/lib/id3v2/FrameParser.ts index 3be811cbb..e3c0add7e 100644 --- a/lib/id3v2/FrameParser.ts +++ b/lib/id3v2/FrameParser.ts @@ -100,7 +100,12 @@ export class FrameParser { case 'MVNM': case 'PCS': case 'PCST': - const text = util.decodeString(b.slice(1), encoding).replace(/\x00+$/, ''); + let text; + try { + text = util.decodeString(b.subarray(1), encoding).replace(/\x00+$/, ''); + } catch (error) { + this.warningCollector.addWarning(`id3v2.${this.major} type=${type} header has invalid string value: ${error.message}`); + } switch (type) { case 'TMCL': // Musician credits list case 'TIPL': // Involved people list diff --git a/test/samples/mp3/issue-979.mp3 b/test/samples/mp3/issue-979.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e9813a0fe580d0f49a1f9df3ebe7b50efc4709b4 GIT binary patch literal 8153 zcmeI1K}!Nr5Xb)_K}JF6E-g`=Qg$`#&>>e{R8k9lgdM`3x`?EkYzo=QPgD0kUWD81 zDj{OA&^yjsW@cyJ-`VBIfD5mbPem~WcJ4*lssh-}ATsj+13J=VbO~`|8U7ZT*|%S$ z)3IHxDBF*=Fw0r0Zjsfc(;khbJ0W%Q7j;SaJKJQHp5ydv?O3bWqL#a&N|a1D#GO^6 zn+>w0uBS!5mRm||1v5|SW(Jp2AhB01iAJMAm3=bT9`@y=D|=3JJhEsahcRU% zst*V#Kq3o068Xh1xOjz4S%VoA9A@T`JrhtvfJaUZNA41u0FPW~R4i0O zfJaUZNA41u0FPW~R4i0OfJaUZNA41u0FPW~R4i0OfJaUZNA41u0FPW~R4i0OK$wN4 dGwwA#S5DGb6hEiwEA@HmAB}5kcNWEn_YH$sqN@M^ literal 0 HcmV?d00001 diff --git a/test/test-file-mp3.ts b/test/test-file-mp3.ts index 4d4be6738..72cc451e7 100644 --- a/test/test-file-mp3.ts +++ b/test/test-file-mp3.ts @@ -148,7 +148,7 @@ describe('Parse MP3 files', () => { // https://github.com/Borewit/music-metadata/issues/398 it('Handle empty picture tag', async () => { - const filePath = path.join(samplePath, 'mp3', 'empty-picture-tag.mp3'); + const filePath = path.join(mp3SamplePath, 'empty-picture-tag.mp3'); const {format, common, quality} = await mm.parseFile(filePath); assert.strictEqual(format.container, 'MPEG', 'format.container'); @@ -162,10 +162,26 @@ describe('Parse MP3 files', () => { assert.includeDeepMembers(quality.warnings, [{message: 'Empty picture tag found'}], 'quality.warnings includes Empty picture tag found'); }); + // https://github.com/Borewit/music-metadata/issues/979 + it('Handle odd number of octets for 16 bit unicide string', async () => { + const filePath = path.join(mp3SamplePath, 'issue-979.mp3'); // TLEN as invalid encode 16 bit unicode string + + const {format, common, quality} = await mm.parseFile(filePath, {duration: true}); + assert.strictEqual(format.container, 'MPEG', 'format.container'); + assert.strictEqual(format.codec, 'MPEG 1 Layer 3', 'format.codec'); + + assert.strictEqual(common.title, 'Minnie & Me', 'common.title'); + assert.strictEqual(common.artist, 'Alexander Hacke', 'common.artist'); + assert.strictEqual(common.album, 'Sanctuary', 'common.album'); + assert.strictEqual(common.year, 2005, 'common.year'); + + assert.includeDeepMembers(quality.warnings, [{message: 'id3v2.3 type=TLEN header has invalid string value: Expected even number of octets for 16-bit unicode string'}], 'Warning on invalid TLEN field'); + }); + // https://github.com/Borewit/music-metadata/issues/430 it('Handle preceeding ADTS frame with (invalid) frame length of 0 bytes', async () => { - const filePath = path.join(samplePath, 'mp3', 'adts-0-frame.mp3'); + const filePath = path.join(mp3SamplePath, 'adts-0-frame.mp3'); const {format, common} = await mm.parseFile(filePath, {duration: true}); @@ -184,7 +200,7 @@ describe('Parse MP3 files', () => { it('Able to handle corrupt LAME header', async () => { - const filePath = path.join(samplePath, 'mp3', 'issue-554.mp3'); + const filePath = path.join(mp3SamplePath, 'issue-554.mp3'); const {format, quality} = await mm.parseFile(filePath, {duration: true}); @@ -221,7 +237,7 @@ describe('Parse MP3 files', () => { describe('MP3/CBR without Xing header', () => { - const filePath = path.join(samplePath, 'mp3', 'Sleep Away.mp3'); + const filePath = path.join(mp3SamplePath, 'Sleep Away.mp3'); describe('duration=false', () => { @@ -264,7 +280,7 @@ describe('Parse MP3 files', () => { it('should be able to parse APEv1 header"', async () => { - const filePath = path.join(samplePath, 'mp3', 'issue-362.apev1.mp3'); + const filePath = path.join(mp3SamplePath, 'issue-362.apev1.mp3'); const {format, common} = await mm.parseFile(filePath, {duration: true}); @@ -284,7 +300,7 @@ describe('Parse MP3 files', () => { it('should be able to parse APEv2 header followed by a Lyrics3v2 header', async () => { - const filePath = path.join(samplePath, 'mp3', 'APEv2+Lyrics3v2.mp3'); + const filePath = path.join(mp3SamplePath, 'APEv2+Lyrics3v2.mp3'); const metadata = await mm.parseFile(filePath); assert.strictEqual(metadata.format.container, 'MPEG');