Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ID3: add parsing text, link and APIC frames #412

Merged
merged 7 commits into from
Jun 8, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 113 additions & 29 deletions lib/m2ts/metadata-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
var
Stream = require('../utils/stream'),
StreamTypes = require('./stream-types'),
// Frames that allow different types of text encoding contain a text
// encoding description byte [ID3v2.4.0 section 4.]
textEncodingDescriptionByte = {
Iso88591: 0x00, // ISO-8859-1, terminated with \0.
Utf16: 0x01, // UTF-16 encoded Unicode BOM, terminated with \0\0
Utf16be: 0x02, // UTF-16BE encoded Unicode, without BOM, terminated with \0\0
Utf8: 0x03 // UTF-8 encoded Unicode, terminated with \0
},
// return a percent-encoded representation of the specified byte range
// @see http://en.wikipedia.org/wiki/Percent-encoding
percentEncode = function(bytes, start, end) {
Expand All @@ -38,9 +46,67 @@ var
(data[3]);
},
tagParsers = {
TXXX: function(tag) {
'APIC': function(tag) {
var
i = 1,
mimeTypeEndIndex,
descriptionEndIndex,
LINK_MIME_TYPE = '-->';

if (tag.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

// parsing fields [ID3v2.4.0 section 4.14.]
mimeTypeEndIndex = tag.data.indexOf(0, i);
if (mimeTypeEndIndex < 0) {
// malformed frame
return;
}

// parsing Mime type field (terminated with \0)
tag.mimeType = parseIso88591(tag.data, i, mimeTypeEndIndex);
i = mimeTypeEndIndex + 1;

// parsing 1-byte Picture Type field
tag.pictureType = tag.data[i];
i++

descriptionEndIndex = tag.data.indexOf(0, i);
if (descriptionEndIndex < 0) {
// malformed frame
return;
}

// parsing Description field (terminated with \0)
tag.description = parseUtf8(tag.data, i, descriptionEndIndex);
i = descriptionEndIndex + 1;

if (tag.mimeType === LINK_MIME_TYPE) {
// parsing Picture Data field as URL (always represented as ISO-8859-1 [ID3v2.4.0 section 4.])
tag.url = parseIso88591(tag.data, i, tag.data.length)
} else {
// parsing Picture Data field as binary data
tag.pictureData = tag.data.subarray(i, tag.data.length);
}
},
'T*': function(tag) {
if (tag.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

// parse text field, do not include null terminator in the tag value
// frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
tag.value = parseUtf8(tag.data, 1, tag.data.length).replace(/\0*$/, '')
pszemus marked this conversation as resolved.
Show resolved Hide resolved

pszemus marked this conversation as resolved.
Show resolved Hide resolved
// text information frames supports multiple strings, stored as a terminator separated list [ID3v2.4.0 section 4.2.]
tag.values = tag.value.split('\0');
},
'TXXX': function(tag) {
var i;
if (tag.data[0] !== 3) {
if (tag.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}
Expand All @@ -50,15 +116,21 @@ var
// parse the text fields
tag.description = parseUtf8(tag.data, 1, i);
// do not include the null terminator in the tag value
// frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
tag.value = parseUtf8(tag.data, i + 1, tag.data.length).replace(/\0*$/, '');
break;
}
}
tag.data = tag.value;
},
WXXX: function(tag) {
'W*': function(tag) {
// parse URL field; URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
// if the value is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
tag.url = parseIso88591(tag.data, 0, tag.data.length).replace(/\0.*$/, '');
},
'WXXX': function(tag) {
var i;
if (tag.data[0] !== 3) {
if (tag.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}
Expand All @@ -67,12 +139,14 @@ var
if (tag.data[i] === 0) {
// parse the description and URL fields
tag.description = parseUtf8(tag.data, 1, i);
tag.url = parseUtf8(tag.data, i + 1, tag.data.length);
// URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
// if the value is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
tag.url = parseIso88591(tag.data, i + 1, tag.data.length).replace(/\0.*$/, '');
break;
}
}
},
PRIV: function(tag) {
'PRIV': function(tag) {
var i;

for (i = 0; i < tag.data.length; i++) {
Expand Down Expand Up @@ -215,34 +289,44 @@ MetadataStream = function(options) {
data: tag.data.subarray(frameStart + 10, frameStart + frameSize + 10)
};
frame.key = frame.id;

// parse frame values
if (tagParsers[frame.id]) {
// use frame specific parser
tagParsers[frame.id](frame);
} else if (frame.id[0] === 'T') {
// use text frame generic parser
tagParsers['T*'](frame);
} else if (frame.id[0] === 'W') {
// use URL link frame generic parser
tagParsers['W*'](frame);
}

// handle the special PRIV frame used to indicate the start
// time for raw AAC data
if (frame.owner === 'com.apple.streaming.transportStreamTimestamp') {
var
d = frame.data,
size = ((d[3] & 0x01) << 30) |
(d[4] << 22) |
(d[5] << 14) |
(d[6] << 6) |
(d[7] >>> 2);

// handle the special PRIV frame used to indicate the start
// time for raw AAC data
if (frame.owner === 'com.apple.streaming.transportStreamTimestamp') {
var
d = frame.data,
size = ((d[3] & 0x01) << 30) |
(d[4] << 22) |
(d[5] << 14) |
(d[6] << 6) |
(d[7] >>> 2);

size *= 4;
size += d[7] & 0x03;
frame.timeStamp = size;
// in raw AAC, all subsequent data will be timestamped based
// on the value of this frame
// we couldn't have known the appropriate pts and dts before
// parsing this ID3 tag so set those values now
if (tag.pts === undefined && tag.dts === undefined) {
tag.pts = frame.timeStamp;
tag.dts = frame.timeStamp;
}
this.trigger('timestamp', frame);
size *= 4;
size += d[7] & 0x03;
frame.timeStamp = size;
// in raw AAC, all subsequent data will be timestamped based
// on the value of this frame
// we couldn't have known the appropriate pts and dts before
// parsing this ID3 tag so set those values now
if (tag.pts === undefined && tag.dts === undefined) {
tag.pts = frame.timeStamp;
tag.dts = frame.timeStamp;
}
this.trigger('timestamp', frame);
}

tag.frames.push(frame);

frameStart += 10; // advance past the frame header
Expand Down
94 changes: 93 additions & 1 deletion test/metadata-stream.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,48 @@ QUnit.test('should skip tag frame parsing on malformed frame, preserving previou
})

assert.equal(events.length, 1, 'parsed 1 tag')
assert.equal(events[0].frames.length, 1, 'parsed one frame');
assert.equal(events[0].frames.length, 1, 'parsed 1 frame');
assert.equal(events[0].frames[0].key, 'TIT2');
});

QUnit.test('can parse APIC frame in web worker', function(assert) {
var worker = new MetadataStreamTestWorker(),
done = assert.async();

worker.addEventListener('message', function(e) {
assert.equal(e.data.frames[0].key, 'APIC', 'frame key is APIC');
assert.equal(e.data.frames[0].mimeType, 'image/jpeg', 'parsed MIME type is "image/jpeg"');
assert.equal(e.data.frames[0].pictureType, 0x03, 'parsed picture type is 0x03');
assert.equal(e.data.frames[0].description, 'sample description', 'parsed description');
assert.deepEqual(e.data.frames[0].pictureData, new Uint8Array(stringToInts("picture binary data")), 'parsed picture data');
assert.equal(e.data.frames[1].key, 'APIC', 'frame key is APIC');
assert.equal(e.data.frames[1].mimeType, '-->', 'parsed MIME type is "-->"');
assert.equal(e.data.frames[1].pictureType, 0x04, 'parsed picture type is 0x04');
assert.equal(e.data.frames[1].description, 'sample description 2', 'parsed description');
assert.equal(e.data.frames[1].url, 'http://example.org/cover-back.jpg', 'parsed picture data');
worker.terminate();
done();
});

worker.postMessage({
type: 'timed-metadata',
data: new Uint8Array(id3Tag(id3Frame('APIC',
0x03, // Text encoding: UTF-8
stringToCString('image/jpeg'), // MIME type + \0
0x03, // Picture type: Cover (front) [ID3v2.4.0 section 4.14]
stringToCString('sample description'), // Decription + \0
stringToInts('picture binary data')
),
id3Frame('APIC',
0x03, // Text encoding: UTF-8
stringToCString('-->'), // MIME type: link to the image [ID3v2.4.0 section 4.14] + \0
0x04, // Picture type: Cover (back) [ID3v2.4.0 section 4.14]
stringToCString('sample description 2'), // Decription + \0
stringToInts('http://example.org/cover-back.jpg')
)))
});
});

QUnit.test('can parse PRIV frames in web worker', function(assert) {
var payload = stringToInts('arbitrary'),
worker = new MetadataStreamTestWorker(),
Expand Down Expand Up @@ -591,6 +629,60 @@ QUnit.test('can parse TXXX frames in web worker', function(assert) {
});
});

QUnit.test('should parse text frames in web worker', function(assert) {
var worker = new MetadataStreamTestWorker(),
done = assert.async();

worker.addEventListener('message', function(e) {
assert.equal(e.data.frames.length, 2, 'got 2 frames');
assert.equal(e.data.frames[0].key, 'TIT2', 'frame key is TIT2');
assert.equal(e.data.frames[0].value, 'sample song title', 'parsed value')
assert.equal(e.data.frames[0].values.length, 1, 'parsed value is an array of size 1')
assert.equal(e.data.frames[0].values[0], 'sample song title', 'parsed a non multiple strings value')
assert.equal(e.data.frames[1].key, 'TIT3', 'frame key is TIT3');
assert.equal(e.data.frames[1].value, 'sample title 1\0sample title 2', 'parsed value')
assert.equal(e.data.frames[1].values.length, 2, 'parsed value is an array of size 2')
assert.equal(e.data.frames[1].values[0], 'sample title 1', 'parsed 1st multiple strings value')
assert.equal(e.data.frames[1].values[1], 'sample title 2', 'parsed 2nd multiple strings value')
worker.terminate();
done();
});

worker.postMessage({
type: 'timed-metadata',
data: new Uint8Array(id3Tag(id3Frame('TIT2',
0x03, // utf-8
// frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
stringToCString('sample song title')),
id3Frame('TIT3',
0x03, // utf-8
// frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
// text information frames supports multiple strings, stored as a terminator separated list [ID3v2.4.0 section 4.2.]
stringToCString('sample title 1'), stringToCString('sample title 2'))))
});
});

QUnit.test('should parse URL link frames in web worker', function(assert) {
var worker = new MetadataStreamTestWorker(),
done = assert.async(),
payloadBytes;

// if the payload is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
payloadBytes = stringToInts('http://example.org\0 ignored \0 part')

worker.addEventListener('message', function(e) {
assert.equal(e.data.frames[0].key, 'WOAF', 'frame key is WOAF');
assert.equal(e.data.frames[0].url, 'http://example.org', 'parsed URL')
worker.terminate();
done();
});

worker.postMessage({
type: 'timed-metadata',
data: new Uint8Array(id3Tag(id3Frame('WOAF', payloadBytes)))
});
});

QUnit.test('triggers special event after parsing a timestamp ID3 tag', function(assert) {
var
array = new Uint8Array(73),
Expand Down