Skip to content

Commit

Permalink
Feature: detect archives with bad extensions with file signatures (#1121
Browse files Browse the repository at this point in the history
)
  • Loading branch information
emmercm authored May 9, 2024
1 parent c32113f commit dbd343d
Show file tree
Hide file tree
Showing 10 changed files with 251 additions and 44 deletions.
2 changes: 1 addition & 1 deletion src/modules/romIndexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ export default class ROMIndexer extends Module {
}

/**
* This ordering should match {@link FileFactory#entriesFrom}
* This ordering should match {@link FileFactory#entriesFromArchiveExtension}
*/
private static archiveEntryPriority(file: File): number {
if (!(file instanceof ArchiveEntry)) {
Expand Down
7 changes: 6 additions & 1 deletion src/types/files/archives/rar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ import Archive from './archive.js';
import ArchiveEntry from './archiveEntry.js';

export default class Rar extends Archive {
static readonly SUPPORTED_EXTENSIONS = ['.rar'];
static readonly SUPPORTED_FILES: [string[], Buffer[]][] = [
[['.rar'], [
Buffer.from('526172211A0700', 'hex'), // v1.50+
Buffer.from('526172211A070100', 'hex'), // v5.00+
]],
];

private static readonly EXTRACT_MUTEX = new Mutex();

Expand Down
70 changes: 53 additions & 17 deletions src/types/files/archives/sevenZip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,59 @@ import ArchiveEntry from './archiveEntry.js';
export default class SevenZip extends Archive {
// p7zip `7za i`
// WARNING: tar+compression doesn't work, you'll be left with a tar file output
static readonly SUPPORTED_EXTENSIONS = [
'.7z', // 7z
// '.bz2', '.bzip2', // bzip2
// '.cab', // cab
'.gz', '.gzip', // gzip
// '.lzma', // lzma
// '.lzma86', // lzma86
// '.pmd', // ppmd
'.zip.001', // split
// '.tar', '.ova', // tar
// '.xz', // xz
'.z', // z
'.zip', '.z01', '.zipx', // zip
// '.zst', // zstd
// '.lz4', // lz4
// '.lz5', // lz5
// '.liz', // lizard
static readonly SUPPORTED_FILES: [string[], Buffer[]][] = [
[['.7z'], [
Buffer.from('377ABCAF271C', 'hex'),
]],
// [['.bz2', '.bzip2'], [
// 'BZh',
// ]],
// [['.cab'], [
// 'MSCF',
// ]],
[['.gz', '.gzip'], [
Buffer.from('1F8B', 'hex'),
]],
// [['.lzma'], [
// // ???
// ]],
// [['.lzma86'], [
// // ???
// ]],
// [['.pmd'], [
// // ???
// ]],
// [['.tar', '.ova'], [
// Buffer.from('7573746172003030', 'hex'),
// Buffer.from('7573746172202000', 'hex'),
// ]],
// [['.xz'], [
// Buffer.from('FD377A585A00', 'hex'), // LZMA2 compression
// ]],
[['.z'], [
Buffer.from('1F9D', 'hex'), // LZW compression
Buffer.from('1FA0', 'hex'), // LZH compression
]],
[['.zip', '.zip.001', '.z01'], [
Buffer.from('504B0304', 'hex'),
Buffer.from('504B0506', 'hex'), // empty archive
Buffer.from('504B0708', 'hex'), // spanned archive
]],
[['.zipx', '.zx01'], [
// ???
]],
// [['.zst'], [
// Buffer.from('28B52FFD', 'hex'),
// ]],
// [['.lz4'], [
// Buffer.from('04224D18', 'hex'),
// ]],
// [['.lz5'], [
// // ???
// ]],
// [['.liz'], [
// // ???
// ]],
];

private static readonly LIST_MUTEX = new Mutex();
Expand Down
11 changes: 8 additions & 3 deletions src/types/files/archives/tar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,14 @@ import Archive from './archive.js';
import ArchiveEntry from './archiveEntry.js';

export default class Tar extends Archive {
static readonly SUPPORTED_EXTENSIONS = [
'.tar',
'.tar.gz', '.tgz',
static readonly SUPPORTED_FILES: [string[], Buffer[]][] = [
[['.tar'], [
Buffer.from('7573746172003030', 'hex'),
Buffer.from('7573746172202000', 'hex'),
]],
[['.tar.gz', '.tgz'], [
Buffer.from('1F8B', 'hex'),
]],
];

// eslint-disable-next-line class-methods-use-this
Expand Down
7 changes: 6 additions & 1 deletion src/types/files/archives/zip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@ import Archive from './archive.js';
import ArchiveEntry from './archiveEntry.js';

export default class Zip extends Archive {
static readonly SUPPORTED_EXTENSIONS = ['.zip'];
static readonly SUPPORTED_FILES: [string[], Buffer[]][] = [
[['.zip'], [
Buffer.from('504B0304', 'hex'),
Buffer.from('504B0506', 'hex'), // empty archive
]],
];

// eslint-disable-next-line class-methods-use-this
protected new(filePath: string): Archive {
Expand Down
104 changes: 92 additions & 12 deletions src/types/files/fileFactory.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import fs from 'node:fs';
import path from 'node:path';

import Archive from './archives/archive.js';
Expand All @@ -15,12 +16,16 @@ export default class FileFactory {
filePath: string,
checksumBitmask: number = ChecksumBitmask.CRC32,
): Promise<File[]> {
if (!this.isArchive(filePath)) {
if (!this.isExtensionArchive(filePath)) {
const entries = await this.entriesFromArchiveSignature(filePath, checksumBitmask);
if (entries !== undefined) {
return entries;
}
return [await this.fileFrom(filePath, checksumBitmask)];
}

try {
return await this.entriesFrom(filePath, checksumBitmask);
return await this.entriesFromArchiveExtension(filePath, checksumBitmask);
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
throw new Error(`file doesn't exist: ${filePath}`);
Expand All @@ -40,20 +45,35 @@ export default class FileFactory {
}

/**
* Assuming we've already checked if the file path has a valid archive extension, assume that
* archive extension is accurate and parse the archive.
*
* This ordering should match {@link ROMScanner#archiveEntryPriority}
*/
private static async entriesFrom(
private static async entriesFromArchiveExtension(
filePath: string,
checksumBitmask: number,
): Promise<ArchiveEntry<Archive>[]> {
let archive: Archive;
if (Zip.SUPPORTED_EXTENSIONS.some((ext) => filePath.toLowerCase().endsWith(ext))) {
if (Zip.SUPPORTED_FILES
.flatMap(([exts]) => exts)
.some((ext) => filePath.toLowerCase().endsWith(ext))
) {
archive = new Zip(filePath);
} else if (Tar.SUPPORTED_EXTENSIONS.some((ext) => filePath.toLowerCase().endsWith(ext))) {
} else if (Tar.SUPPORTED_FILES
.flatMap(([exts]) => exts)
.some((ext) => filePath.toLowerCase().endsWith(ext))
) {
archive = new Tar(filePath);
} else if (Rar.SUPPORTED_EXTENSIONS.some((ext) => filePath.toLowerCase().endsWith(ext))) {
} else if (Rar.SUPPORTED_FILES
.flatMap(([exts]) => exts)
.some((ext) => filePath.toLowerCase().endsWith(ext))
) {
archive = new Rar(filePath);
} else if (SevenZip.SUPPORTED_EXTENSIONS.some((ext) => filePath.toLowerCase().endsWith(ext))) {
} else if (SevenZip.SUPPORTED_FILES
.flatMap(([exts]) => exts)
.some((ext) => filePath.toLowerCase().endsWith(ext))
) {
archive = new SevenZip(filePath);
} else {
throw new Error(`unknown archive type: ${path.extname(filePath)}`);
Expand All @@ -62,12 +82,72 @@ export default class FileFactory {
return FileCache.getOrComputeEntries(archive, checksumBitmask);
}

static isArchive(filePath: string): boolean {
/**
* Without knowing if the file is an archive or not, read its file signature, and if there is a
* match then parse the archive.
*
* This ordering should match {@link ROMScanner#archiveEntryPriority}
*/
private static async entriesFromArchiveSignature(
filePath: string,
checksumBitmask: number,
): Promise<ArchiveEntry<Archive>[] | undefined> {
const maxSignatureLengthBytes = [
...Zip.SUPPORTED_FILES.flatMap(([, signatures]) => signatures),
...Tar.SUPPORTED_FILES.flatMap(([, signatures]) => signatures),
...Rar.SUPPORTED_FILES.flatMap(([, signatures]) => signatures),
...SevenZip.SUPPORTED_FILES.flatMap(([, signatures]) => signatures),
].reduce((max, signature) => Math.max(max, signature.length), 0);

let fileSignature: Buffer;
try {
const stream = fs.createReadStream(filePath, { end: maxSignatureLengthBytes });
fileSignature = await new Promise<Buffer>((resolve, reject) => {
const chunks: Buffer[] = [];
stream.on('data', (chunk) => chunks.push(Buffer.from(chunk)));
stream.on('end', () => resolve(Buffer.concat(chunks)));
stream.on('error', reject);
});
stream.destroy();
} catch {
// Fail silently on assumed I/O errors
return undefined;
}

let archive: Archive;
if (Zip.SUPPORTED_FILES
.flatMap(([, signatures]) => signatures)
.some((sig) => fileSignature.subarray(0, sig.length).equals(sig))
) {
archive = new Zip(filePath);
} else if (Tar.SUPPORTED_FILES
.flatMap(([, signatures]) => signatures)
.some((sig) => fileSignature.subarray(0, sig.length).equals(sig))
) {
archive = new Tar(filePath);
} else if (Rar.SUPPORTED_FILES
.flatMap(([, signatures]) => signatures)
.some((sig) => fileSignature.subarray(0, sig.length).equals(sig))
) {
archive = new Rar(filePath);
} else if (SevenZip.SUPPORTED_FILES
.flatMap(([, signatures]) => signatures)
.some((sig) => fileSignature.subarray(0, sig.length).equals(sig))
) {
archive = new SevenZip(filePath);
} else {
return undefined;
}

return FileCache.getOrComputeEntries(archive, checksumBitmask);
}

static isExtensionArchive(filePath: string): boolean {
return [
...Zip.SUPPORTED_EXTENSIONS,
...Tar.SUPPORTED_EXTENSIONS,
...Rar.SUPPORTED_EXTENSIONS,
...SevenZip.SUPPORTED_EXTENSIONS,
...Zip.SUPPORTED_FILES.flatMap(([exts]) => exts),
...Tar.SUPPORTED_FILES.flatMap(([exts]) => exts),
...Rar.SUPPORTED_FILES.flatMap(([exts]) => exts),
...SevenZip.SUPPORTED_FILES.flatMap(([exts]) => exts),
].some((ext) => filePath.toLowerCase().endsWith(ext));
}
}
8 changes: 4 additions & 4 deletions src/types/outputFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ export default class OutputFactory {

if ((options.getDirGameSubdir() === GameSubdirMode.MULTIPLE
&& game.getRoms().length > 1
&& !FileFactory.isArchive(ext))
&& !FileFactory.isExtensionArchive(ext))
|| options.getDirGameSubdir() === GameSubdirMode.ALWAYS
) {
output = path.join(game.getName(), output);
Expand Down Expand Up @@ -495,9 +495,9 @@ export default class OutputFactory {

const romBasename = this.getRomBasename(game, rom, inputFile);

if (
!(inputFile instanceof ArchiveEntry || FileFactory.isArchive(inputFile.getFilePath()))
|| options.shouldExtract()
if (!(
inputFile instanceof ArchiveEntry || FileFactory.isExtensionArchive(inputFile.getFilePath())
) || options.shouldExtract()
) {
// Should extract (if needed), generate the file name from the ROM name
return romBasename;
Expand Down
2 changes: 1 addition & 1 deletion test/modules/patchScanner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ describe('multiple files', () => {

it('should scan multiple files of incorrect extensions', async () => {
const patchFiles = (await new Options({ patch: ['test/fixtures/patches/*'] }).scanPatchFilesWithoutExclusions())
.filter((filePath) => !FileFactory.isArchive(filePath));
.filter((filePath) => !FileFactory.isExtensionArchive(filePath));

const tempDir = await fsPoly.mkdtemp(Constants.GLOBAL_TEMP_DIR);
try {
Expand Down
8 changes: 4 additions & 4 deletions test/types/files/archives/archive.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ import FileFactory from '../../../../src/types/files/fileFactory.js';

describe('getArchiveEntries', () => {
test.each([...new Set([
...Zip.SUPPORTED_EXTENSIONS,
...Tar.SUPPORTED_EXTENSIONS,
...Rar.SUPPORTED_EXTENSIONS,
...SevenZip.SUPPORTED_EXTENSIONS,
...Zip.SUPPORTED_FILES.flatMap(([exts]) => exts),
...Tar.SUPPORTED_FILES.flatMap(([exts]) => exts),
...Rar.SUPPORTED_FILES.flatMap(([exts]) => exts),
...SevenZip.SUPPORTED_FILES.flatMap(([exts]) => exts),
])])('should throw when the file doesn\'t exist: %s', async (extension) => {
const tempFile = (await fsPoly.mktemp(path.join(Constants.GLOBAL_TEMP_DIR, 'file'))) + extension;
await expect(FileFactory.filesFrom(tempFile)).rejects.toThrow();
Expand Down
76 changes: 76 additions & 0 deletions test/types/files/fileFactory.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import path from 'node:path';

import Constants from '../../../src/constants.js';
import FsPoly from '../../../src/polyfill/fsPoly.js';
import ArchiveEntry from '../../../src/types/files/archives/archiveEntry.js';
import FileFactory from '../../../src/types/files/fileFactory.js';

describe('filesFrom', () => {
describe('archives', () => {
test.each([
['test/fixtures/roms/7z/empty.7z', 0],
['test/fixtures/roms/7z/fizzbuzz.7z', 1],
['test/fixtures/roms/7z/foobar.7z', 1],
['test/fixtures/roms/7z/loremipsum.7z', 1],
['test/fixtures/roms/7z/onetwothree.7z', 3],
['test/fixtures/roms/7z/unknown.7z', 1],
['test/fixtures/roms/rar/fizzbuzz.rar', 1],
['test/fixtures/roms/rar/foobar.rar', 1],
['test/fixtures/roms/rar/loremipsum.rar', 1],
['test/fixtures/roms/rar/onetwothree.rar', 3],
['test/fixtures/roms/rar/unknown.rar', 1],
['test/fixtures/roms/tar/fizzbuzz.tar.gz', 1],
['test/fixtures/roms/tar/foobar.tar.gz', 1],
['test/fixtures/roms/tar/loremipsum.tar.gz', 1],
['test/fixtures/roms/tar/onetwothree.tar.gz', 3],
['test/fixtures/roms/tar/unknown.tar.gz', 1],
['test/fixtures/roms/zip/empty.zip', 0],
['test/fixtures/roms/zip/fizzbuzz.zip', 1],
['test/fixtures/roms/zip/foobar.zip', 1],
['test/fixtures/roms/zip/fourfive.zip', 2],
['test/fixtures/roms/zip/loremipsum.zip', 1],
['test/fixtures/roms/zip/onetwothree.zip', 3],
['test/fixtures/roms/zip/unknown.zip', 1],
])('should read the entries of archives with valid extensions: %s', async (filePath, expectedCount) => {
const archiveEntries = await FileFactory.filesFrom(filePath);
expect(archiveEntries.every((archiveEntry) => archiveEntry instanceof ArchiveEntry))
.toEqual(true);
expect(archiveEntries).toHaveLength(expectedCount);
});

test.each([
['test/fixtures/roms/7z/fizzbuzz.7z', 1],
['test/fixtures/roms/7z/foobar.7z', 1],
['test/fixtures/roms/7z/loremipsum.7z', 1],
['test/fixtures/roms/7z/onetwothree.7z', 3],
['test/fixtures/roms/7z/unknown.7z', 1],
['test/fixtures/roms/rar/fizzbuzz.rar', 1],
['test/fixtures/roms/rar/foobar.rar', 1],
['test/fixtures/roms/rar/loremipsum.rar', 1],
['test/fixtures/roms/rar/onetwothree.rar', 3],
['test/fixtures/roms/rar/unknown.rar', 1],
['test/fixtures/roms/tar/fizzbuzz.tar.gz', 1],
['test/fixtures/roms/tar/foobar.tar.gz', 1],
['test/fixtures/roms/tar/loremipsum.tar.gz', 1],
['test/fixtures/roms/tar/onetwothree.tar.gz', 3],
['test/fixtures/roms/tar/unknown.tar.gz', 1],
['test/fixtures/roms/zip/fizzbuzz.zip', 1],
['test/fixtures/roms/zip/foobar.zip', 1],
['test/fixtures/roms/zip/fourfive.zip', 2],
['test/fixtures/roms/zip/loremipsum.zip', 1],
['test/fixtures/roms/zip/onetwothree.zip', 3],
['test/fixtures/roms/zip/unknown.zip', 1],
])('should read the entries of non-empty archives with junk extensions: %s', async (filePath, expectedCount) => {
const tempFile = await FsPoly.mktemp(path.join(Constants.GLOBAL_TEMP_DIR, 'file'));
await FsPoly.copyFile(filePath, tempFile);
try {
const archiveEntries = await FileFactory.filesFrom(tempFile);
expect(archiveEntries.every((archiveEntry) => archiveEntry instanceof ArchiveEntry))
.toEqual(true);
expect(archiveEntries).toHaveLength(expectedCount);
} finally {
await FsPoly.rm(tempFile, { force: true });
}
});
});
});

0 comments on commit dbd343d

Please sign in to comment.