Skip to content

Commit

Permalink
Feature: cache ROM header detection (#1235)
Browse files Browse the repository at this point in the history
  • Loading branch information
emmercm authored Jul 26, 2024
1 parent e7c43e5 commit 5d27f2b
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 223 deletions.
24 changes: 6 additions & 18 deletions src/modules/romHeaderProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import ProgressBar, { ProgressBarSymbol } from '../console/progressBar.js';
import DriveSemaphore from '../driveSemaphore.js';
import ArchiveEntry from '../types/files/archives/archiveEntry.js';
import File from '../types/files/file.js';
import FileCache from '../types/files/fileCache.js';
import ROMHeader from '../types/files/romHeader.js';
import Options from '../types/options.js';
import Module from './module.js';
Expand Down Expand Up @@ -72,30 +73,17 @@ export default class ROMHeaderProcessor extends Module {
return inputFile;
}

// Can get FileHeader from extension, use that
const headerForFilename = ROMHeader.headerFromFilename(inputFile.getExtractedFilePath());
if (headerForFilename) {
this.progressBar.logTrace(`${inputFile.toString()}: reading potentially headered file by filename: ${headerForFilename.getHeaderedFileExtension()}`);
const fileWithHeader = await inputFile.withFileHeader(headerForFilename);
if (fileWithHeader.getFileHeader()) {
this.progressBar.logTrace(`${inputFile.toString()}: found header by filename: ${headerForFilename.getHeaderedFileExtension()}`);
} else {
this.progressBar.logTrace(`${inputFile.toString()}: found non-applicable header by filename: ${headerForFilename.getHeaderedFileExtension()}`);
}
return fileWithHeader;
}

// Should get FileHeader from File, try to
if (this.options.shouldReadFileForHeader(inputFile.getExtractedFilePath())) {
if (ROMHeader.headerFromFilename(inputFile.getExtractedFilePath()) !== undefined
|| this.options.shouldReadFileForHeader(inputFile.getExtractedFilePath())
) {
this.progressBar.logTrace(`${inputFile.toString()}: reading potentially headered file by file contents`);
const headerForFileStream = await inputFile.createReadStream(
async (stream) => ROMHeader.headerFromFileStream(stream),
);
const headerForFileStream = await FileCache.getOrComputeFileHeader(inputFile);
if (headerForFileStream) {
this.progressBar.logTrace(`${inputFile.toString()}: found header by file contents: ${headerForFileStream.getHeaderedFileExtension()}`);
return inputFile.withFileHeader(headerForFileStream);
}
this.progressBar.logWarn(`${inputFile.toString()}: didn't find header by file contents`);
this.progressBar.logTrace(`${inputFile.toString()}: didn't find header by file contents`);
}

// Should not get FileHeader
Expand Down
7 changes: 1 addition & 6 deletions src/types/files/archives/archiveEntry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,9 @@ export default class ArchiveEntry<A extends Archive> extends File implements Arc
}

async withFileHeader(fileHeader: ROMHeader): Promise<ArchiveEntry<A>> {
// Make sure the file actually has the right file signature
const hasHeader = await this.createReadStream(
async (stream) => fileHeader.fileHasHeader(stream),
);
if (!hasHeader) {
if (fileHeader === this.fileHeader) {
return this;
}

return ArchiveEntry.entryOf({
...this,
fileHeader,
Expand Down
7 changes: 1 addition & 6 deletions src/types/files/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -444,14 +444,9 @@ export default class File implements FileProps {
}

async withFileHeader(fileHeader: ROMHeader): Promise<File> {
// Make sure the file actually has the right file signature
const hasHeader = await this.createReadStream(
async (stream) => fileHeader.fileHasHeader(stream),
);
if (!hasHeader) {
if (fileHeader === this.fileHeader) {
return this;
}

return File.fileOf({
...this,
fileHeader,
Expand Down
50 changes: 43 additions & 7 deletions src/types/files/fileCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@ import Archive from './archives/archive.js';
import ArchiveEntry, { ArchiveEntryProps } from './archives/archiveEntry.js';
import File, { FileProps } from './file.js';
import { ChecksumBitmask } from './fileChecksums.js';
import ROMHeader from './romHeader.js';

interface CacheValue {
fileSize: number,
modifiedTimeMillis: number,
value: FileProps | ArchiveEntryProps<Archive>[],
value: FileProps | ArchiveEntryProps<Archive>[] | string | undefined,
}

enum ValueType {
FILE = 'F',
ARCHIVE_ENTRIES = 'A',
FILE_CHECKSUMS = 'F',
ARCHIVE_CHECKSUMS = 'A',
FILE_HEADER = 'H',
}

export default class FileCache {
Expand Down Expand Up @@ -43,6 +45,7 @@ export default class FileCache {
const keyRegex = new RegExp(`^V${prevVersion}\\|`);
return this.cache.delete(keyRegex);
}));
// await this.cache.delete(new RegExp(`\\|[^${Object.values(ValueType).join()}]$`));

// Delete keys for deleted files
const disks = FsPoly.disksSync();
Expand Down Expand Up @@ -71,7 +74,7 @@ export default class FileCache {
await this.cache.save();
}

static async getOrComputeFile(
static async getOrComputeFileChecksums(
filePath: string,
checksumBitmask: number,
): Promise<File> {
Expand All @@ -81,7 +84,7 @@ export default class FileCache {

// NOTE(cemmer): we're explicitly not catching ENOENT errors here, we want it to bubble up
const stats = await FsPoly.stat(filePath);
const cacheKey = this.getCacheKey(filePath, ValueType.FILE);
const cacheKey = this.getCacheKey(filePath, ValueType.FILE_CHECKSUMS);

// NOTE(cemmer): we're using the cache as a mutex here, so even if this function is called
// multiple times concurrently, entries will only be fetched once.
Expand Down Expand Up @@ -131,7 +134,7 @@ export default class FileCache {
});
}

static async getOrComputeEntries<T extends Archive>(
static async getOrComputeArchiveChecksums<T extends Archive>(
archive: T,
checksumBitmask: number,
): Promise<ArchiveEntry<Archive>[]> {
Expand All @@ -141,7 +144,7 @@ export default class FileCache {

// NOTE(cemmer): we're explicitly not catching ENOENT errors here, we want it to bubble up
const stats = await FsPoly.stat(archive.getFilePath());
const cacheKey = this.getCacheKey(archive.getFilePath(), ValueType.ARCHIVE_ENTRIES);
const cacheKey = this.getCacheKey(archive.getFilePath(), ValueType.ARCHIVE_CHECKSUMS);

// NOTE(cemmer): we're using the cache as a mutex here, so even if this function is called
// multiple times concurrently, entries will only be fetched once.
Expand Down Expand Up @@ -192,6 +195,39 @@ export default class FileCache {
})));
}

static async getOrComputeFileHeader(file: File): Promise<ROMHeader | undefined> {
// NOTE(cemmer): we're explicitly not catching ENOENT errors here, we want it to bubble up
const stats = await FsPoly.stat(file.getFilePath());
const cacheKey = this.getCacheKey(file.toString(), ValueType.FILE_HEADER);

const cachedValue = await this.cache.getOrCompute(
cacheKey,
async () => {
const header = await file.createReadStream(
async (stream) => ROMHeader.headerFromFileStream(stream),
);
return {
fileSize: stats.size,
modifiedTimeMillis: stats.mtimeMs,
value: header?.getName(),
};
},
(cached) => {
if (cached.fileSize !== stats.size || cached.modifiedTimeMillis !== stats.mtimeMs) {
// File has changed since being cached
return true;
}
return false;
},
);

const cachedHeaderName = cachedValue.value as string | undefined;
if (!cachedHeaderName) {
return undefined;
}
return ROMHeader.headerFromName(cachedHeaderName);
}

private static getCacheKey(filePath: string, valueType: ValueType): string {
return `V${FileCache.VERSION}|${filePath}|${valueType}`;
}
Expand Down
4 changes: 2 additions & 2 deletions src/types/files/fileFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ export default class FileFactory {
filePath: string,
checksumBitmask: number,
): Promise<File> {
return FileCache.getOrComputeFile(filePath, checksumBitmask);
return FileCache.getOrComputeFileChecksums(filePath, checksumBitmask);
}

public static async archiveFileFrom(
Expand Down Expand Up @@ -94,7 +94,7 @@ export default class FileFactory {
return undefined;
}

return FileCache.getOrComputeEntries(archive, checksumBitmask);
return FileCache.getOrComputeArchiveChecksums(archive, checksumBitmask);
}

/**
Expand Down
21 changes: 12 additions & 9 deletions src/types/files/romHeader.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import path from 'node:path';
import { Readable } from 'node:stream';

import { Memoize } from 'typescript-memoize';

import ArrayPoly from '../../polyfill/arrayPoly.js';

export default class ROMHeader {
Expand Down Expand Up @@ -62,6 +64,10 @@ export default class ROMHeader {
.sort();
}

static headerFromName(name: string): ROMHeader | undefined {
return this.HEADERS[name];
}

static headerFromFilename(filePath: string): ROMHeader | undefined {
const headers = Object.values(this.HEADERS);
for (const header of headers) {
Expand Down Expand Up @@ -123,6 +129,12 @@ export default class ROMHeader {
return undefined;
}

@Memoize()
getName(): string {
return Object.keys(ROMHeader.HEADERS)
.find((name) => ROMHeader.HEADERS[name] === this) as string;
}

getDataOffsetBytes(): number {
return this.dataOffsetBytes;
}
Expand All @@ -134,13 +146,4 @@ export default class ROMHeader {
getHeaderlessFileExtension(): string {
return this.headerlessFileExtension;
}

async fileHasHeader(stream: Readable): Promise<boolean> {
const header = await ROMHeader.readHeaderHex(
stream,
this.headerOffsetBytes,
this.headerOffsetBytes + this.headerValue.length / 2,
);
return header.toUpperCase() === this.headerValue.toUpperCase();
}
}
109 changes: 0 additions & 109 deletions test/types/files/archives/archiveEntry.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,34 +179,6 @@ describe('getCrc32WithoutHeader', () => {
expect(archiveEntry.getSha256WithoutHeader()).toBeUndefined();
});

test.each([
['./test/fixtures/roms/7z/fizzbuzz.7z', '370517b5'],
['./test/fixtures/roms/gz/fizzbuzz.gz', '370517b5'],
['./test/fixtures/roms/rar/fizzbuzz.rar', '370517b5'],
['./test/fixtures/roms/tar/fizzbuzz.tar.gz', '370517b5'],
['./test/fixtures/roms/zip/fizzbuzz.zip', '370517b5'],
['./test/fixtures/roms/7z/foobar.7z', 'b22c9747'],
['./test/fixtures/roms/gz/foobar.gz', 'b22c9747'],
['./test/fixtures/roms/rar/foobar.rar', 'b22c9747'],
['./test/fixtures/roms/tar/foobar.tar.gz', 'b22c9747'],
['./test/fixtures/roms/zip/foobar.zip', 'b22c9747'],
])('should hash the full archive entry when header is given but not present in file: %s', async (filePath, expectedCrc) => {
const archiveEntries = await FileFactory.filesFrom(filePath);
expect(archiveEntries).toHaveLength(1);
const archiveEntry = await archiveEntries[0].withFileHeader(
ROMHeader.headerFromFilename(archiveEntries[0].getExtractedFilePath()) as ROMHeader,
);

expect(archiveEntry.getCrc32()).toEqual(expectedCrc);
expect(archiveEntry.getCrc32WithoutHeader()).toEqual(expectedCrc);
expect(archiveEntry.getMd5()).toBeUndefined();
expect(archiveEntry.getMd5WithoutHeader()).toBeUndefined();
expect(archiveEntry.getSha1()).toBeUndefined();
expect(archiveEntry.getSha1WithoutHeader()).toBeUndefined();
expect(archiveEntry.getSha256()).toBeUndefined();
expect(archiveEntry.getSha256WithoutHeader()).toBeUndefined();
});

test.each([
['./test/fixtures/roms/headered/diagnostic_test_cartridge.a78.7z', 'a1eaa7c1'],
['./test/fixtures/roms/headered/fds_joypad_test.fds.zip', '3ecbac61'],
Expand Down Expand Up @@ -298,33 +270,6 @@ describe('getMd5WithoutHeader', () => {
expect(archiveEntry.getSha256WithoutHeader()).toBeUndefined();
});

test.each([
['./test/fixtures/roms/7z/fizzbuzz.7z', 'cbe8410861130a91609295349918c2c2'],
['./test/fixtures/roms/gz/fizzbuzz.gz', 'cbe8410861130a91609295349918c2c2'],
['./test/fixtures/roms/rar/fizzbuzz.rar', 'cbe8410861130a91609295349918c2c2'],
['./test/fixtures/roms/tar/fizzbuzz.tar.gz', 'cbe8410861130a91609295349918c2c2'],
['./test/fixtures/roms/zip/fizzbuzz.zip', 'cbe8410861130a91609295349918c2c2'],
['./test/fixtures/roms/7z/foobar.7z', '14758f1afd44c09b7992073ccf00b43d'],
['./test/fixtures/roms/gz/foobar.gz', '14758f1afd44c09b7992073ccf00b43d'],
['./test/fixtures/roms/rar/foobar.rar', '14758f1afd44c09b7992073ccf00b43d'],
['./test/fixtures/roms/tar/foobar.tar.gz', '14758f1afd44c09b7992073ccf00b43d'],
['./test/fixtures/roms/zip/foobar.zip', '14758f1afd44c09b7992073ccf00b43d'],
])('should hash the full archive entry when header is given but not present in file: %s', async (filePath, expectedMd5) => {
const archiveEntries = await FileFactory.filesFrom(filePath, ChecksumBitmask.MD5);
expect(archiveEntries).toHaveLength(1);
const archiveEntry = await archiveEntries[0].withFileHeader(
ROMHeader.headerFromFilename(archiveEntries[0].getExtractedFilePath()) as ROMHeader,
);

// Some archives store CRC32, or otherwise it won't be defined
expect(archiveEntry.getMd5()).toEqual(expectedMd5);
expect(archiveEntry.getMd5WithoutHeader()).toEqual(expectedMd5);
expect(archiveEntry.getSha1()).toBeUndefined();
expect(archiveEntry.getSha1WithoutHeader()).toBeUndefined();
expect(archiveEntry.getSha256()).toBeUndefined();
expect(archiveEntry.getSha256WithoutHeader()).toBeUndefined();
});

test.each([
['./test/fixtures/roms/headered/diagnostic_test_cartridge.a78.7z', '91041aadd1700a7a4076f4005f2c362f'],
['./test/fixtures/roms/headered/fds_joypad_test.fds.zip', '26df56a7e5b096577338bcc4c334ec7d'],
Expand Down Expand Up @@ -416,33 +361,6 @@ describe('getSha1WithoutHeader', () => {
expect(archiveEntry.getSha256WithoutHeader()).toBeUndefined();
});

test.each([
['./test/fixtures/roms/7z/fizzbuzz.7z', '5a316d9f0e06964d94cdd62a933803d7147ddadb'],
['./test/fixtures/roms/gz/fizzbuzz.gz', '5a316d9f0e06964d94cdd62a933803d7147ddadb'],
['./test/fixtures/roms/rar/fizzbuzz.rar', '5a316d9f0e06964d94cdd62a933803d7147ddadb'],
['./test/fixtures/roms/tar/fizzbuzz.tar.gz', '5a316d9f0e06964d94cdd62a933803d7147ddadb'],
['./test/fixtures/roms/zip/fizzbuzz.zip', '5a316d9f0e06964d94cdd62a933803d7147ddadb'],
['./test/fixtures/roms/7z/foobar.7z', '988881adc9fc3655077dc2d4d757d480b5ea0e11'],
['./test/fixtures/roms/gz/foobar.gz', '988881adc9fc3655077dc2d4d757d480b5ea0e11'],
['./test/fixtures/roms/rar/foobar.rar', '988881adc9fc3655077dc2d4d757d480b5ea0e11'],
['./test/fixtures/roms/tar/foobar.tar.gz', '988881adc9fc3655077dc2d4d757d480b5ea0e11'],
['./test/fixtures/roms/zip/foobar.zip', '988881adc9fc3655077dc2d4d757d480b5ea0e11'],
])('should hash the full archive entry when header is given but not present in file: %s', async (filePath, expectedSha1) => {
const archiveEntries = await FileFactory.filesFrom(filePath, ChecksumBitmask.SHA1);
expect(archiveEntries).toHaveLength(1);
const archiveEntry = await archiveEntries[0].withFileHeader(
ROMHeader.headerFromFilename(archiveEntries[0].getExtractedFilePath()) as ROMHeader,
);

// Some archives store CRC32, or otherwise it won't be defined
expect(archiveEntry.getMd5()).toBeUndefined();
expect(archiveEntry.getMd5WithoutHeader()).toBeUndefined();
expect(archiveEntry.getSha1()).toEqual(expectedSha1);
expect(archiveEntry.getSha1WithoutHeader()).toEqual(expectedSha1);
expect(archiveEntry.getSha256()).toBeUndefined();
expect(archiveEntry.getSha256WithoutHeader()).toBeUndefined();
});

test.each([
['./test/fixtures/roms/headered/diagnostic_test_cartridge.a78.7z', '76ec76c423d88bdf739e673c051c5b9c174881c6'],
['./test/fixtures/roms/headered/fds_joypad_test.fds.zip', '7b6bd1a69bbc5d8121c72dd1eedfb6752fe11787'],
Expand Down Expand Up @@ -534,33 +452,6 @@ describe('getSha256WithoutHeader', () => {
expect(archiveEntry.getSha256WithoutHeader()).toEqual(expectedSha256);
});

test.each([
['./test/fixtures/roms/7z/fizzbuzz.7z', '6e809804766eaa4dd42a2607b789f3e4e5d32fc321ba8dd3ef39ddc1ea2888e9'],
['./test/fixtures/roms/gz/fizzbuzz.gz', '6e809804766eaa4dd42a2607b789f3e4e5d32fc321ba8dd3ef39ddc1ea2888e9'],
['./test/fixtures/roms/rar/fizzbuzz.rar', '6e809804766eaa4dd42a2607b789f3e4e5d32fc321ba8dd3ef39ddc1ea2888e9'],
['./test/fixtures/roms/tar/fizzbuzz.tar.gz', '6e809804766eaa4dd42a2607b789f3e4e5d32fc321ba8dd3ef39ddc1ea2888e9'],
['./test/fixtures/roms/zip/fizzbuzz.zip', '6e809804766eaa4dd42a2607b789f3e4e5d32fc321ba8dd3ef39ddc1ea2888e9'],
['./test/fixtures/roms/7z/foobar.7z', 'aec070645fe53ee3b3763059376134f058cc337247c978add178b6ccdfb0019f'],
['./test/fixtures/roms/gz/foobar.gz', 'aec070645fe53ee3b3763059376134f058cc337247c978add178b6ccdfb0019f'],
['./test/fixtures/roms/rar/foobar.rar', 'aec070645fe53ee3b3763059376134f058cc337247c978add178b6ccdfb0019f'],
['./test/fixtures/roms/tar/foobar.tar.gz', 'aec070645fe53ee3b3763059376134f058cc337247c978add178b6ccdfb0019f'],
['./test/fixtures/roms/zip/foobar.zip', 'aec070645fe53ee3b3763059376134f058cc337247c978add178b6ccdfb0019f'],
])('should hash the full archive entry when header is given but not present in file: %s', async (filePath, expectedSha256) => {
const archiveEntries = await FileFactory.filesFrom(filePath, ChecksumBitmask.SHA256);
expect(archiveEntries).toHaveLength(1);
const archiveEntry = await archiveEntries[0].withFileHeader(
ROMHeader.headerFromFilename(archiveEntries[0].getExtractedFilePath()) as ROMHeader,
);

// Some archives store CRC32, or otherwise it won't be defined
expect(archiveEntry.getMd5()).toBeUndefined();
expect(archiveEntry.getMd5WithoutHeader()).toBeUndefined();
expect(archiveEntry.getSha1()).toBeUndefined();
expect(archiveEntry.getSha1WithoutHeader()).toBeUndefined();
expect(archiveEntry.getSha256()).toEqual(expectedSha256);
expect(archiveEntry.getSha256WithoutHeader()).toEqual(expectedSha256);
});

test.each([
['./test/fixtures/roms/headered/diagnostic_test_cartridge.a78.7z', '248faac52d828b3542b74ff478e87afc6748949ad0f294fe75e6be94966a7558'],
['./test/fixtures/roms/headered/fds_joypad_test.fds.zip', '29e56794d15ccaa79e48ec0c80004f8745cfb116cce43b99435ae8790e79c327'],
Expand Down
Loading

0 comments on commit 5d27f2b

Please sign in to comment.