diff --git a/lib/Open/directory.js b/lib/Open/directory.js index 2b7e9cf..ee8da80 100644 --- a/lib/Open/directory.js +++ b/lib/Open/directory.js @@ -1,17 +1,14 @@ -var binary = require('binary'); var PullStream = require('../PullStream'); -var unzip = require('./unzip'); var Promise = require('bluebird'); var BufferStream = require('../BufferStream'); +var read = require('../read'); var signature = Buffer(4); signature.writeUInt32LE(0x06054b50,0); module.exports = function centralDirectory(source) { var endDir = PullStream(), - records = PullStream(), - self = this, - vars; + records = PullStream(); return source.size() .then(function(size) { @@ -19,64 +16,34 @@ module.exports = function centralDirectory(source) { return endDir.pull(signature); }) .then(function() { - return endDir.pull(22); + return read.endOfDirectory(endDir); }) - .then(function(data) { - vars = binary.parse(data) - .word32lu('signature') - .word16lu('diskNumber') - .word16lu('diskStart') - .word16lu('numberOfRecordsOnDisk') - .word16lu('numberOfRecords') - .word32lu('sizeOfCentralDirectory') - .word32lu('offsetToStartOfCentralDirectory') - .word16lu('commentLength') - .vars; - + .then(function(vars) { + source.stream(vars.offsetToStartOfCentralDirectory).pipe(records); vars.files = Promise.mapSeries(Array(vars.numberOfRecords),function() { - return records.pull(46).then(function(data) { - var vars = binary.parse(data) - .word32lu('signature') - .word16lu('versionMadeBy') - .word16lu('versionsNeededToExtract') - .word16lu('flags') - .word16lu('compressionMethod') - .word16lu('lastModifiedTime') - .word16lu('lastModifiedDate') - .word32lu('crc32') - .word32lu('compressedSize') - .word32lu('uncompressedSize') - .word16lu('fileNameLength') - .word16lu('extraFieldLength') - .word16lu('fileCommentLength') - .word16lu('diskNumber') - .word16lu('internalFileAttributes') - .word32lu('externalFileAttributes') - .word32lu('offsetToLocalFileHeader') - .vars; + return read.directoryFileHeader(records) + .then(function(file) { + file.raw = function(_password) { + var p = source.stream(file.offsetToLocalFileHeader).pipe(PullStream()); + return read.fileStream(p,{password: _password,raw: true}); + }; + + file.stream = function(_password) { + var p = source.stream(file.offsetToLocalFileHeader).pipe(PullStream()); + return read.fileStream(p,{password:_password}); + }; - return records.pull(vars.fileNameLength).then(function(fileName) { - vars.path = fileName.toString('utf8'); - return records.pull(vars.extraFieldLength); - }) - .then(function(extraField) { - return records.pull(vars.fileCommentLength); - }) - .then(function(comment) { - vars.comment = comment; - vars.stream = function(_password) { - return unzip(source, vars.offsetToLocalFileHeader,_password); - }; - vars.buffer = function(_password) { - return BufferStream(vars.stream(_password)); - }; - return vars; - }); + file.buffer = function(_password) { + return BufferStream(file.stream(_password)); + }; + return file; + }); }); + + return Promise.props(vars); }); - return Promise.props(vars); - }); + }; diff --git a/lib/Open/unzip.js b/lib/Open/unzip.js deleted file mode 100644 index db2964d..0000000 --- a/lib/Open/unzip.js +++ /dev/null @@ -1,128 +0,0 @@ -var Promise = require('bluebird'); -var Decrypt = require('../Decrypt'); -var PullStream = require('../PullStream'); -var Stream = require('stream'); -var binary = require('binary'); -var zlib = require('zlib'); - -// Backwards compatibility for node 0.8 -if (!Stream.Writable) - Stream = require('readable-stream'); - -module.exports = function unzip(source,offset,_password) { - var file = PullStream(), - entry = Stream.PassThrough(), - vars; - - var req = source.stream(offset); - req.pipe(file); - - entry.vars = file.pull(30) - .then(function(data) { - var vars = binary.parse(data) - .word32lu('signature') - .word16lu('versionsNeededToExtract') - .word16lu('flags') - .word16lu('compressionMethod') - .word16lu('lastModifiedTime') - .word16lu('lastModifiedDate') - .word32lu('crc32') - .word32lu('compressedSize') - .word32lu('uncompressedSize') - .word16lu('fileNameLength') - .word16lu('extraFieldLength') - .vars; - return file.pull(vars.fileNameLength) - .then(function(fileName) { - vars.fileName = fileName.toString('utf8'); - return file.pull(vars.extraFieldLength); - }) - .then(function(extraField) { - var extra = binary.parse(extraField) - .word16lu('signature') - .word16lu('partsize') - .word64lu('uncompressedSize') - .word64lu('compressedSize') - .word64lu('offset') - .word64lu('disknum') - .vars, - checkEncryption; - - if (vars.compressedSize === 0xffffffff) - vars.compressedSize = extra.compressedSize; - - if (vars.uncompressedSize === 0xffffffff) - vars.uncompressedSize= extra.uncompressedSize; - - if (vars.flags & 0x01) checkEncryption = file.pull(12) - .then(function(header) { - if (!_password) - throw new Error('MISSING_PASSWORD'); - - var decrypt = Decrypt(); - - String(_password).split('').forEach(function(d) { - decrypt.update(d); - }); - - for (var i=0; i < header.length; i++) - header[i] = decrypt.decryptByte(header[i]); - - vars.decrypt = decrypt; - vars.compressedSize -= 12; - - var check = (vars.flags & 0x8) ? (vars.lastModifiedTime >> 8) & 0xff : (vars.crc32 >> 24) & 0xff; - if (header[11] !== check) - throw new Error('BAD_PASSWORD'); - - return vars; - }); - - return Promise.resolve(checkEncryption) - .then(function() { - entry.emit('vars',vars); - return vars; - }); - }); - }); - - entry.vars.then(function(vars) { - var fileSizeKnown = !(vars.flags & 0x08), - eof; - - var inflater = vars.compressionMethod ? zlib.createInflateRaw() : Stream.PassThrough(); - - if (fileSizeKnown) { - entry.size = vars.uncompressedSize; - eof = vars.compressedSize; - } else { - eof = new Buffer(4); - eof.writeUInt32LE(0x08074b50, 0); - } - - var stream = file.stream(eof); - - if (vars.decrypt) - stream = stream.pipe(vars.decrypt.stream()); - - stream - .pipe(inflater) - .on('error',function(err) { entry.emit('error',err);}) - .pipe(entry) - .on('finish', function() { - if (req.abort) - req.abort(); - else if (req.close) - req.close(); - else if (req.push) - req.push(); - else - console.log('warning - unable to close stream'); - }); - }) - .catch(function(e) { - entry.emit('error',e); - }); - - return entry; -}; \ No newline at end of file diff --git a/lib/PullStream.js b/lib/PullStream.js index c66bddb..863be73 100644 --- a/lib/PullStream.js +++ b/lib/PullStream.js @@ -33,7 +33,7 @@ PullStream.prototype._write = function(chunk,e,cb) { // otherwise (i.e. buffer) it is interpreted as a pattern signaling end of stream PullStream.prototype.stream = function(eof,includeEof) { var p = Stream.PassThrough(); - var count = 0,done,packet,self= this; + var done,packet,self= this; function pull() { if (self.buffer && self.buffer.length) { @@ -61,13 +61,12 @@ PullStream.prototype.stream = function(eof,includeEof) { } if (!done) { - if (self.finished && !this.__ended) { + if (self.finished && !self.__ended) { self.removeListener('chunk',pull); self.emit('error','FILE_ENDED'); this.__ended = true; return; } - } else { self.removeListener('chunk',pull); p.end(); diff --git a/lib/parse.js b/lib/parse.js index df1bb8f..0220fff 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -1,11 +1,8 @@ var util = require('util'); -var zlib = require('zlib'); var Stream = require('stream'); -var binary = require('binary'); var Promise = require('bluebird'); var PullStream = require('./PullStream'); -var NoopStream = require('./NoopStream'); -var BufferStream = require('./BufferStream'); +var read = require('./read'); // Backwards compatibility for node 0.8 if (!Stream.Writable) @@ -36,21 +33,26 @@ Parse.prototype._readRecord = function () { if (data.length === 0) return; + // Read signature and put back on buffer var signature = data.readUInt32LE(0); + self.buffer = Buffer.concat([data,self.buffer]); - if (signature === 0x04034b50) { + if (signature === 0x04034b50) return self._readFile(); - } else if (signature === 0x02014b50) { self.__ended = true; - return self._readCentralDirectoryFileHeader(); - } - else if (signature === 0x06054b50) { - return self._readEndOfCentralDirectoryRecord(); + return read.directoryFileHeader(self) + .then(function(vars) { + return self._readRecord(); + }); } - else if (self.__ended) { + else if (signature === 0x06054b50 || self.__ended) { return self.pull(endDirectorySignature).then(function() { - return self._readEndOfCentralDirectoryRecord(); + return read.endOfDirectory(self); + }) + .then(function() { + self.end(); + self.push(null); }); } else @@ -60,182 +62,43 @@ Parse.prototype._readRecord = function () { Parse.prototype._readFile = function () { var self = this; - self.pull(26).then(function(data) { - var vars = binary.parse(data) - .word16lu('versionsNeededToExtract') - .word16lu('flags') - .word16lu('compressionMethod') - .word16lu('lastModifiedTime') - .word16lu('lastModifiedDate') - .word32lu('crc32') - .word32lu('compressedSize') - .word32lu('uncompressedSize') - .word16lu('fileNameLength') - .word16lu('extraFieldLength') - .vars; - - return self.pull(vars.fileNameLength).then(function(fileName) { - fileName = fileName.toString('utf8'); - var entry = Stream.PassThrough(); - - entry.autodrain = function() { - return new Promise(function(resolve,reject) { - entry.pipe(NoopStream()); - entry.on('finish',resolve); - entry.on('error',reject); + var entry = read.fileStream(self,self._opts); + + entry.vars.then(function(vars) { + entry.vars = vars; + var fileSizeKnown = !(vars.flags & 0x08); + entry.path = vars.path; + entry.props = {}; + entry.props.path = vars.fileName; + entry.type = (vars.compressedSize === 0 && /[\/\\]$/.test(vars.path)) ? 'Directory' : 'File'; + + entry.autodrain = function() { + entry.autodraining = true; + return new Promise(function(resolve,reject) { + entry.on('finish',resolve); + entry.on('error',reject); + }); + }; + + self.emit('entry',entry); + + if (self._readableState.pipesCount) + self.push(entry); + + entry + .on('error',function(err) { self.emit('error',err);}) + .on('finish', function() { + Promise.resolve(!fileSizeKnown && read.dataDescriptor(self)) + .then(function() { + return self._readRecord(); }); - }; - - entry.buffer = function() { - return BufferStream(entry); - }; - - entry.path = fileName; - entry.props = {}; - entry.props.path = fileName; - entry.type = (vars.compressedSize === 0 && /[\/\\]$/.test(fileName)) ? 'Directory' : 'File'; - - if (self._opts.verbose) { - if (entry.type === 'Directory') { - console.log(' creating:', fileName); - } else if (entry.type === 'File') { - if (vars.compressionMethod === 0) { - console.log(' extracting:', fileName); - } else { - console.log(' inflating:', fileName); - } - } - } - - self.pull(vars.extraFieldLength).then(function(extraField) { - var extra = binary.parse(extraField) - .word16lu('signature') - .word16lu('partsize') - .word64lu('uncompressedSize') - .word64lu('compressedSize') - .word64lu('offset') - .word64lu('disknum') - .vars; - - if (vars.compressedSize === 0xffffffff) - vars.compressedSize = extra.compressedSize; - if (vars.uncompressedSize === 0xffffffff) - vars.uncompressedSize= extra.uncompressedSize; - - entry.vars = vars; - entry.extra = extra; - - self.emit('entry', entry); - - if (self._readableState.pipesCount) - self.push(entry); - - if (self._opts.verbose) - console.log({ - filename:fileName, - vars: vars, - extra: extra - }); - - var fileSizeKnown = !(vars.flags & 0x08), - eof; - - var inflater = vars.compressionMethod ? zlib.createInflateRaw() : Stream.PassThrough(); - - if (fileSizeKnown) { - entry.size = vars.uncompressedSize; - eof = vars.compressedSize; - } else { - eof = new Buffer(4); - eof.writeUInt32LE(0x08074b50, 0); - } - - self.stream(eof) - .pipe(inflater) - .on('error',function(err) { self.emit('error',err);}) - .pipe(entry) - .on('finish', function() { - return fileSizeKnown ? self._readRecord() : self._processDataDescriptor(entry); - }); }); - }); }); -}; -Parse.prototype._processDataDescriptor = function (entry) { - var self = this; - self.pull(16).then(function(data) { - var vars = binary.parse(data) - .word32lu('dataDescriptorSignature') - .word32lu('crc32') - .word32lu('compressedSize') - .word32lu('uncompressedSize') - .vars; - - entry.size = vars.uncompressedSize; - self._readRecord(); - }); }; -Parse.prototype._readCentralDirectoryFileHeader = function () { - var self = this; - self.pull(42).then(function(data) { - - var vars = binary.parse(data) - .word16lu('versionMadeBy') - .word16lu('versionsNeededToExtract') - .word16lu('flags') - .word16lu('compressionMethod') - .word16lu('lastModifiedTime') - .word16lu('lastModifiedDate') - .word32lu('crc32') - .word32lu('compressedSize') - .word32lu('uncompressedSize') - .word16lu('fileNameLength') - .word16lu('extraFieldLength') - .word16lu('fileCommentLength') - .word16lu('diskNumber') - .word16lu('internalFileAttributes') - .word32lu('externalFileAttributes') - .word32lu('offsetToLocalFileHeader') - .vars; - - return self.pull(vars.fileNameLength).then(function(fileName) { - vars.fileName = fileName.toString('utf8'); - return self.pull(vars.extraFieldLength); - }) - .then(function(extraField) { - return self.pull(vars.fileCommentLength); - }) - .then(function(fileComment) { - return self._readRecord(); - }); - }); -}; -Parse.prototype._readEndOfCentralDirectoryRecord = function() { - var self = this; - self.pull(18).then(function(data) { - - var vars = binary.parse(data) - .word16lu('diskNumber') - .word16lu('diskStart') - .word16lu('numberOfRecordsOnDisk') - .word16lu('numberOfRecords') - .word32lu('sizeOfCentralDirectory') - .word32lu('offsetToStartOfCentralDirectory') - .word16lu('commentLength') - .vars; - - self.pull(vars.commentLength).then(function(comment) { - comment = comment.toString('utf8'); - self.end(); - self.push(null); - }); - - }); -}; Parse.prototype.promise = function() { var self = this; diff --git a/lib/parseOne.js b/lib/parseOne.js index be8103b..52077a7 100644 --- a/lib/parseOne.js +++ b/lib/parseOne.js @@ -8,6 +8,7 @@ if (!Stream.Writable) Stream = require('readable-stream'); function parseOne(match,opts) { + opts = opts || {}; var inStream = Stream.PassThrough({objectMode:true}); var outStream = Stream.PassThrough(); var transform = Stream.Transform({objectMode:true}); @@ -15,6 +16,7 @@ function parseOne(match,opts) { var found; transform._transform = function(entry,e,cb) { + entry.password = opts.password; if (found || (re && !re.exec(entry.path))) { entry.autodrain(); return cb(); @@ -55,4 +57,5 @@ function parseOne(match,opts) { return out; } + module.exports = parseOne; \ No newline at end of file diff --git a/lib/read/dataDescriptor.js b/lib/read/dataDescriptor.js new file mode 100644 index 0000000..4851568 --- /dev/null +++ b/lib/read/dataDescriptor.js @@ -0,0 +1,12 @@ +var binary = require('binary'); + +module.exports = function(p) { + return p.pull(16).then(function(data) { + return binary.parse(data) + .word32lu('dataDescriptorSignature') + .word32lu('crc32') + .word32lu('compressedSize') + .word32lu('uncompressedSize') + .vars; + }); +}; \ No newline at end of file diff --git a/lib/read/directoryFileHeader.js b/lib/read/directoryFileHeader.js new file mode 100644 index 0000000..272b95f --- /dev/null +++ b/lib/read/directoryFileHeader.js @@ -0,0 +1,37 @@ +var binary = require('binary'); + +module.exports = function file(p) { + return p.pull(46).then(function(data) { + var vars = binary.parse(data) + .word32lu('signature') + .word16lu('versionMadeBy') + .word16lu('versionsNeededToExtract') + .word16lu('flags') + .word16lu('compressionMethod') + .word16lu('lastModifiedTime') + .word16lu('lastModifiedDate') + .word32lu('crc32') + .word32lu('compressedSize') + .word32lu('uncompressedSize') + .word16lu('fileNameLength') + .word16lu('extraFieldLength') + .word16lu('fileCommentLength') + .word16lu('diskNumber') + .word16lu('internalFileAttributes') + .word32lu('externalFileAttributes') + .word32lu('offsetToLocalFileHeader') + .vars; + + return p.pull(vars.fileNameLength).then(function(fileName) { + vars.path = fileName.toString('utf8'); + return p.pull(vars.extraFieldLength); + }) + .then(function(extraField) { + return p.pull(vars.fileCommentLength); + }) + .then(function(fileComment) { + vars.comment = fileComment.toString('utf-8'); + return vars; + }); + }); +}; \ No newline at end of file diff --git a/lib/read/endOfDirectory.js b/lib/read/endOfDirectory.js new file mode 100644 index 0000000..172e555 --- /dev/null +++ b/lib/read/endOfDirectory.js @@ -0,0 +1,22 @@ +var binary = require('binary'); + +module.exports = function(p) { + return p.pull(22).then(function(data) { + + var vars = binary.parse(data) + .word32lu('signature') + .word16lu('diskNumber') + .word16lu('diskStart') + .word16lu('numberOfRecordsOnDisk') + .word16lu('numberOfRecords') + .word32lu('sizeOfCentralDirectory') + .word32lu('offsetToStartOfCentralDirectory') + .word16lu('commentLength') + .vars; + + return p.pull(vars.commentLength).then(function(comment) { + vars.comment = comment.toString('utf8'); + return vars; + }); + }); +}; \ No newline at end of file diff --git a/lib/read/fileStream.js b/lib/read/fileStream.js new file mode 100644 index 0000000..1a2a4fe --- /dev/null +++ b/lib/read/fileStream.js @@ -0,0 +1,120 @@ +var Decrypt = require('../Decrypt'); +var NoopStream = require('../NoopStream'); +var BufferStream = require('../BufferStream'); + +var Stream = require('stream'); +var binary = require('binary'); +var zlib = require('zlib'); + +// Backwards compatibility for node 0.8 +if (!Stream.Writable) + Stream = require('readable-stream'); + +module.exports = function unzip(p,opts) { + var vars, entry = Stream.PassThrough(); + opts = opts || {}; + + entry.buffer = function() { + return BufferStream(entry); + }; + + entry.vars = p.pull(30) + .then(function(data) { + vars = binary.parse(data) + .word32lu('signature') + .word16lu('versionsNeededToExtract') + .word16lu('flags') + .word16lu('compressionMethod') + .word16lu('lastModifiedTime') + .word16lu('lastModifiedDate') + .word32lu('crc32') + .word32lu('compressedSize') + .word32lu('uncompressedSize') + .word16lu('fileNameLength') + .word16lu('extraFieldLength') + .vars; + return p.pull(vars.fileNameLength); + }) + .then(function(fileName) { + vars.path = fileName.toString('utf8'); + if (opts.raw) vars.path+='.gz'; + return p.pull(vars.extraFieldLength); + }) + .then(function(extraField) { + var extra = binary.parse(extraField) + .word16lu('signature') + .word16lu('partsize') + .word64lu('uncompressedSize') + .word64lu('compressedSize') + .word64lu('offset') + .word64lu('disknum') + .vars; + + if (vars.compressedSize === 0xffffffff) + vars.compressedSize = extra.compressedSize; + + if (vars.uncompressedSize === 0xffffffff) + vars.uncompressedSize= extra.uncompressedSize; + return vars; + }); + + entry.vars.then(function(vars) { + if (vars.flags & 0x01) { + vars.compressedSize -= 12; + return p.pull(12); + } + }) + .then(function(header) { + var fileSizeKnown = !(vars.flags & 0x08), + eof; + + if (fileSizeKnown) { + entry.size = vars.uncompressedSize; + eof = vars.compressedSize; + } else { + eof = new Buffer(4); + eof.writeUInt32LE(0x08074b50, 0); + } + + var stream = entry.source = p.stream(eof); + + if (entry.autodraining) + return stream.pipe(NoopStream()).pipe(entry); + + if (header) { + var _password = opts.password || entry.password; + if (!_password && !entry.autodraining) + throw new Error('MISSING_PASSWORD'); + + var decrypt = Decrypt(); + + String(_password).split('').forEach(function(d) { + decrypt.update(d); + }); + + for (var i=0; i < header.length; i++) + header[i] = decrypt.decryptByte(header[i]); + + vars.decrypt = decrypt; + vars.compressedSize -= 12; + + var check = (vars.flags & 0x8) ? (vars.lastModifiedTime >> 8) & 0xff : (vars.crc32 >> 24) & 0xff; + if (header[11] !== check && !entry.autodraining) + throw new Error('BAD_PASSWORD'); + } + + var inflater = (!vars.compressionMethod || opts.raw) ? Stream.PassThrough() : zlib.createInflateRaw(); + + if (vars.decrypt) + stream = stream.pipe(vars.decrypt.stream()); + + return stream + .pipe(inflater) + .on('error',function(err) { entry.emit('error',err);}) + .pipe(entry); + }) + .catch(function(e) { + entry.emit('error',e); + }); + return entry; +}; \ No newline at end of file diff --git a/lib/read/index.js b/lib/read/index.js new file mode 100644 index 0000000..8e3fdd4 --- /dev/null +++ b/lib/read/index.js @@ -0,0 +1,6 @@ +module.exports = { + directoryFileHeader : require('./directoryFileHeader'), + endOfDirectory: require('./endOfDirectory'), + fileStream : require('./fileStream'), + dataDescriptor: require('./dataDescriptor') +}; \ No newline at end of file diff --git a/package.json b/package.json index 4ae784f..8ef16c9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "unzipper", - "version": "0.8.10", + "version": "0.9.0-rc8", "description": "Unzip cross-platform streaming API ", "author": "Evan Oxfeld ", "contributors": [ diff --git a/test/parseEncrypted.js b/test/parseEncrypted.js new file mode 100644 index 0000000..404c82e --- /dev/null +++ b/test/parseEncrypted.js @@ -0,0 +1,67 @@ +'use strict'; + +var test = require('tap').test; +var fs = require('fs'); +var path = require('path'); +var unzip = require('../'); + +var archive = path.join(__dirname, '../testData/compressed-encrypted/archive.zip'); + +test("get content of a single file entry out of a zip", function (t) { + return fs.createReadStream(archive) + .pipe(unzip.Parse()) + .on('entry',function(entry) { + if (entry.path !== 'file.txt') + return entry.autodrain(); + entry.password = 'abc123'; + entry.buffer().then(function(str) { + var fileStr = fs.readFileSync(path.join(__dirname, '../testData/compressed-standard/inflated/file.txt'), 'utf8'); + t.equal(str.toString(), str.toString()); + t.end(); + }); + }); +}); + + +test("error if password is missing", function (t) { + return fs.createReadStream(archive) + .pipe(unzip.Parse()) + .on('entry',function(entry) { + if (entry.path !== 'file.txt') + return entry.autodrain(); + entry.buffer() + .then(function() { + t.error('should error'); + },function(e) { + t.equal(e.message,'MISSING_PASSWORD'); + }) + .then(function() { + t.end(); + }); + }) + .on('error',Object); +}); + +test("error if password is wrong", function (t) { + return fs.createReadStream(archive) + .pipe(unzip.Parse()) + .on('entry',function(entry) { + if (entry.path !== 'file.txt') + return entry.autodrain(); + + entry.password = 'abc1234'; + + entry.buffer() + .then(function() { + t.error('should error'); + },function(e) { + t.equal(e.message,'BAD_PASSWORD'); + }) + .then(function() { + t.end(); + }); + }) + .on('error',Object); + + +}); \ No newline at end of file