videojs · gesinger · Jun 8, 2022 · May 27, 2022 · Jun 1, 2022 · Jun 1, 2022
diff --git a/lib/m2ts/metadata-stream.js b/lib/m2ts/metadata-stream.js
@@ -12,6 +12,14 @@
 var
   Stream = require('../utils/stream'),
   StreamTypes = require('./stream-types'),
+  // Frames that allow different types of text encoding contain a text
+  // encoding description byte [ID3v2.4.0 section 4.]
+  textEncodingDescriptionByte = {
+    Iso88591: 0x00, // ISO-8859-1, terminated with \0.
+    Utf16:    0x01, // UTF-16 encoded Unicode BOM, terminated with \0\0
+    Utf16be:  0x02, // UTF-16BE encoded Unicode, without BOM, terminated with \0\0
+    Utf8:     0x03  // UTF-8 encoded Unicode, terminated with \0
+  },
   // return a percent-encoded representation of the specified byte range
   // @see http://en.wikipedia.org/wiki/Percent-encoding
   percentEncode = function(bytes, start, end) {
@@ -38,9 +46,67 @@ var
             (data[3]);
   },
   tagParsers = {
-    TXXX: function(tag) {
+    'APIC': function(tag) {
+      var
+        i = 1,
+        mimeTypeEndIndex,
+        descriptionEndIndex,
+        LINK_MIME_TYPE = '-->';
+
+      if (tag.data[0] !== textEncodingDescriptionByte.Utf8) {
+        // ignore frames with unrecognized character encodings
+        return;
+      }
+
+      // parsing fields [ID3v2.4.0 section 4.14.]
+      mimeTypeEndIndex = tag.data.indexOf(0, i);
+      if (mimeTypeEndIndex < 0) {
+        // malformed frame
+        return;
+      }
+
+      // parsing Mime type field (terminated with \0)
+      tag.mimeType = parseIso88591(tag.data, i, mimeTypeEndIndex);
+      i = mimeTypeEndIndex + 1;
+
+      // parsing 1-byte Picture Type field
+      tag.pictureType = tag.data[i];
+      i++
+
+      descriptionEndIndex = tag.data.indexOf(0, i);
+      if (descriptionEndIndex < 0) {
+        // malformed frame
+        return;
+      }
+
+      // parsing Description field (terminated with \0)
+      tag.description = parseUtf8(tag.data, i, descriptionEndIndex);
+      i = descriptionEndIndex + 1;
+
+      if (tag.mimeType === LINK_MIME_TYPE) {
+        // parsing Picture Data field as URL (always represented as ISO-8859-1 [ID3v2.4.0 section 4.])
+        tag.url = parseIso88591(tag.data, i, tag.data.length)
+      } else {
+        // parsing Picture Data field as binary data
+        tag.pictureData = tag.data.subarray(i, tag.data.length);
+      }
+    },
+    'T*': function(tag) {
+      if (tag.data[0] !== textEncodingDescriptionByte.Utf8) {
+        // ignore frames with unrecognized character encodings
+        return;
+      }
+
+      // parse text field, do not include null terminator in the tag value
+      // frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
+      tag.value = parseUtf8(tag.data, 1, tag.data.length).replace(/\0*$/, '')
+
+      // text information frames supports multiple strings, stored as a terminator separated list [ID3v2.4.0 section 4.2.]
+      tag.values = tag.value.split('\0');
+    },
+    'TXXX': function(tag) {
       var i;
-      if (tag.data[0] !== 3) {
+      if (tag.data[0] !== textEncodingDescriptionByte.Utf8) {
         // ignore frames with unrecognized character encodings
         return;
       }
@@ -50,15 +116,21 @@ var
           // parse the text fields
           tag.description = parseUtf8(tag.data, 1, i);
           // do not include the null terminator in the tag value
+          // frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
           tag.value = parseUtf8(tag.data, i + 1, tag.data.length).replace(/\0*$/, '');
           break;
         }
       }
       tag.data = tag.value;
     },
-    WXXX: function(tag) {
+    'W*': function(tag) {
+      // parse URL field; URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
+      // if the value is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
+      tag.url = parseIso88591(tag.data, 0, tag.data.length).replace(/\0.*$/, '');
+    },
+    'WXXX': function(tag) {
       var i;
-      if (tag.data[0] !== 3) {
+      if (tag.data[0] !== textEncodingDescriptionByte.Utf8) {
         // ignore frames with unrecognized character encodings
         return;
       }
@@ -67,12 +139,14 @@ var
         if (tag.data[i] === 0) {
           // parse the description and URL fields
           tag.description = parseUtf8(tag.data, 1, i);
-          tag.url = parseUtf8(tag.data, i + 1, tag.data.length);
+          // URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
+          // if the value is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
+          tag.url = parseIso88591(tag.data, i + 1, tag.data.length).replace(/\0.*$/, '');
           break;
         }
       }
     },
-    PRIV: function(tag) {
+    'PRIV': function(tag) {
       var i;
 
       for (i = 0; i < tag.data.length; i++) {
@@ -215,34 +289,44 @@ MetadataStream = function(options) {
         data: tag.data.subarray(frameStart + 10, frameStart + frameSize + 10)
       };
       frame.key = frame.id;
+
+      // parse frame values
       if (tagParsers[frame.id]) {
+        // use frame specific parser
         tagParsers[frame.id](frame);
+      } else if (frame.id[0] === 'T') {
+        // use text frame generic parser
+        tagParsers['T*'](frame);
+      } else if (frame.id[0] === 'W') {
+        // use URL link frame generic parser
+        tagParsers['W*'](frame);
+      }
+
+      // handle the special PRIV frame used to indicate the start
+      // time for raw AAC data
+      if (frame.owner === 'com.apple.streaming.transportStreamTimestamp') {
+        var
+          d = frame.data,
+          size = ((d[3] & 0x01)  << 30) |
+                  (d[4]  << 22) |
+                  (d[5] << 14) |
+                  (d[6] << 6) |
+                  (d[7] >>> 2);
 
-        // handle the special PRIV frame used to indicate the start
-        // time for raw AAC data
-        if (frame.owner === 'com.apple.streaming.transportStreamTimestamp') {
-          var
-            d = frame.data,
-            size = ((d[3] & 0x01)  << 30) |
-                   (d[4]  << 22) |
-                   (d[5] << 14) |
-                   (d[6] << 6) |
-                   (d[7] >>> 2);
-
-          size *= 4;
-          size += d[7] & 0x03;
-          frame.timeStamp = size;
-          // in raw AAC, all subsequent data will be timestamped based
-          // on the value of this frame
-          // we couldn't have known the appropriate pts and dts before
-          // parsing this ID3 tag so set those values now
-          if (tag.pts === undefined && tag.dts === undefined) {
-            tag.pts = frame.timeStamp;
-            tag.dts = frame.timeStamp;
-          }
-          this.trigger('timestamp', frame);
+        size *= 4;
+        size += d[7] & 0x03;
+        frame.timeStamp = size;
+        // in raw AAC, all subsequent data will be timestamped based
+        // on the value of this frame
+        // we couldn't have known the appropriate pts and dts before
+        // parsing this ID3 tag so set those values now
+        if (tag.pts === undefined && tag.dts === undefined) {
+          tag.pts = frame.timeStamp;
+          tag.dts = frame.timeStamp;
         }
+        this.trigger('timestamp', frame);
       }
+
       tag.frames.push(frame);
 
       frameStart += 10; // advance past the frame header

diff --git a/test/metadata-stream.test.js b/test/metadata-stream.test.js
@@ -536,10 +536,48 @@ QUnit.test('should skip tag frame parsing on malformed frame, preserving previou
   })
 
   assert.equal(events.length, 1, 'parsed 1 tag')
-  assert.equal(events[0].frames.length, 1, 'parsed one frame');
+  assert.equal(events[0].frames.length, 1, 'parsed 1 frame');
   assert.equal(events[0].frames[0].key, 'TIT2');
 });
 
+QUnit.test('can parse APIC frame in web worker', function(assert) {
+  var worker = new MetadataStreamTestWorker(),
+      done = assert.async();
+
+  worker.addEventListener('message', function(e) {
+    assert.equal(e.data.frames[0].key, 'APIC', 'frame key is APIC');
+    assert.equal(e.data.frames[0].mimeType, 'image/jpeg', 'parsed MIME type is "image/jpeg"');
+    assert.equal(e.data.frames[0].pictureType, 0x03, 'parsed picture type is 0x03');
+    assert.equal(e.data.frames[0].description, 'sample description', 'parsed description');
+    assert.deepEqual(e.data.frames[0].pictureData, new Uint8Array(stringToInts("picture binary data")), 'parsed picture data');
+    assert.equal(e.data.frames[1].key, 'APIC', 'frame key is APIC');
+    assert.equal(e.data.frames[1].mimeType, '-->', 'parsed MIME type is "-->"');
+    assert.equal(e.data.frames[1].pictureType, 0x04, 'parsed picture type is 0x04');
+    assert.equal(e.data.frames[1].description, 'sample description 2', 'parsed description');
+    assert.equal(e.data.frames[1].url, 'http://example.org/cover-back.jpg', 'parsed picture data');
+    worker.terminate();
+    done();
+  });
+
+  worker.postMessage({
+    type: 'timed-metadata',
+    data: new Uint8Array(id3Tag(id3Frame('APIC',
+                                         0x03, // Text encoding: UTF-8
+                                         stringToCString('image/jpeg'), // MIME type + \0
+                                         0x03, // Picture type: Cover (front) [ID3v2.4.0 section 4.14]
+                                         stringToCString('sample description'), // Decription + \0
+                                         stringToInts('picture binary data')
+                                         ),
+                                id3Frame('APIC',
+                                         0x03, // Text encoding: UTF-8
+                                         stringToCString('-->'), // MIME type: link to the image [ID3v2.4.0 section 4.14] + \0
+                                         0x04, // Picture type: Cover (back) [ID3v2.4.0 section 4.14]
+                                         stringToCString('sample description 2'), // Decription + \0
+                                         stringToInts('http://example.org/cover-back.jpg')
+                                         )))
+  });
+});
+
 QUnit.test('can parse PRIV frames in web worker', function(assert) {
   var payload = stringToInts('arbitrary'),
       worker = new MetadataStreamTestWorker(),
@@ -591,6 +629,60 @@ QUnit.test('can parse TXXX frames in web worker', function(assert) {
   });
 });
 
+QUnit.test('should parse text frames in web worker', function(assert) {
+  var worker = new MetadataStreamTestWorker(),
+      done = assert.async();
+
+  worker.addEventListener('message', function(e) {
+    assert.equal(e.data.frames.length, 2, 'got 2 frames');
+    assert.equal(e.data.frames[0].key, 'TIT2', 'frame key is TIT2');
+    assert.equal(e.data.frames[0].value, 'sample song title', 'parsed value')
+    assert.equal(e.data.frames[0].values.length, 1, 'parsed value is an array of size 1')
+    assert.equal(e.data.frames[0].values[0], 'sample song title', 'parsed a non multiple strings value')
+    assert.equal(e.data.frames[1].key, 'TIT3', 'frame key is TIT3');
+    assert.equal(e.data.frames[1].value, 'sample title 1\0sample title 2', 'parsed value')
+    assert.equal(e.data.frames[1].values.length, 2, 'parsed value is an array of size 2')
+    assert.equal(e.data.frames[1].values[0], 'sample title 1', 'parsed 1st multiple strings value')
+    assert.equal(e.data.frames[1].values[1], 'sample title 2', 'parsed 2nd multiple strings value')
+    worker.terminate();
+    done();
+  });
+
+  worker.postMessage({
+    type: 'timed-metadata',
+    data: new Uint8Array(id3Tag(id3Frame('TIT2',
+                                          0x03, // utf-8
+                                          // frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
+                                          stringToCString('sample song title')),
+                                id3Frame('TIT3',
+                                          0x03, // utf-8
+                                          // frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
+                                          // text information frames supports multiple strings, stored as a terminator separated list [ID3v2.4.0 section 4.2.]
+                                          stringToCString('sample title 1'), stringToCString('sample title 2'))))
+  });
+});
+
+QUnit.test('should parse URL link frames in web worker', function(assert) {
+  var worker = new MetadataStreamTestWorker(),
+      done = assert.async(),
+      payloadBytes;
+
+  // if the payload is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
+  payloadBytes = stringToInts('http://example.org\0 ignored \0 part')
+
+  worker.addEventListener('message', function(e) {
+    assert.equal(e.data.frames[0].key, 'WOAF', 'frame key is WOAF');
+    assert.equal(e.data.frames[0].url, 'http://example.org', 'parsed URL')
+    worker.terminate();
+    done();
+  });
+
+  worker.postMessage({
+    type: 'timed-metadata',
+    data: new Uint8Array(id3Tag(id3Frame('WOAF', payloadBytes)))
+  });
+});
+
 QUnit.test('triggers special event after parsing a timestamp ID3 tag', function(assert) {
   var
     array = new Uint8Array(73),