Skip to content

Commit

Permalink
Refactor: defer ArchiveFile checksum calculation until after 1G1R app…
Browse files Browse the repository at this point in the history
…lied (#1162)
  • Loading branch information
emmercm authored Jun 13, 2024
1 parent 01d2138 commit e0c40bb
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 15 deletions.
1 change: 1 addition & 0 deletions src/console/progressBar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const ProgressBarSymbol = {
// Candidates
GENERATING: chalk.cyan('Σ'),
FILTERING: chalk.cyan('∆'),
HASHING: chalk.cyan('#'),
VALIDATING: chalk.cyan(process.platform === 'win32' ? '?' : '≟'),
COMBINING_ALL: chalk.cyan(process.platform === 'win32' ? 'U' : '∪'),
WRITING: chalk.yellow(process.platform === 'win32' ? '»' : '✎'),
Expand Down
9 changes: 7 additions & 2 deletions src/igir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import Logger from './console/logger.js';
import ProgressBar, { ProgressBarSymbol } from './console/progressBar.js';
import ProgressBarCLI from './console/progressBarCli.js';
import Constants from './constants.js';
import CandidateArchiveFileHasher from './modules/candidateArchiveFileHasher.js';
import CandidateCombiner from './modules/candidateCombiner.js';
import CandidateGenerator from './modules/candidateGenerator.js';
import CandidateMergeSplitValidator from './modules/candidateMergeSplitValidator.js';
Expand Down Expand Up @@ -355,10 +356,14 @@ export default class Igir {

const preferredCandidates = await new CandidatePreferer(this.options, progressBar)
.prefer(dat, patchedCandidates);
// TODO(cemmer): calculate raw checksums for archives after applying 1G1R rules

// Delay calculating checksums for {@link ArchiveFile}s until after {@link CandidatePreferer}
// for efficiency
const hashedCandidates = await new CandidateArchiveFileHasher(this.options, progressBar)
.hash(dat, preferredCandidates);

const postProcessedCandidates = await new CandidatePostProcessor(this.options, progressBar)
.process(dat, preferredCandidates);
.process(dat, hashedCandidates);

await new CandidateMergeSplitValidator(this.options, progressBar)
.validate(dat, postProcessedCandidates);
Expand Down
124 changes: 124 additions & 0 deletions src/modules/candidateArchiveFileHasher.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import { Semaphore } from 'async-mutex';

import ProgressBar, { ProgressBarSymbol } from '../console/progressBar.js';
import DAT from '../types/dats/dat.js';
import Parent from '../types/dats/parent.js';
import ArchiveFile from '../types/files/archives/archiveFile.js';
import FileFactory from '../types/files/fileFactory.js';
import Options from '../types/options.js';
import ReleaseCandidate from '../types/releaseCandidate.js';
import ROMWithFiles from '../types/romWithFiles.js';
import Module from './module.js';

/**
* Calculate checksums for {@link ArchiveFile}s (which were skipped in {@link CandidateGenerator}).
* This deferral is done to prevent calculating checksums for files that will be filtered out by
* {@link CandidatePreferer}.
*/
export default class CandidateArchiveFileHasher extends Module {
private static readonly THREAD_SEMAPHORE = new Semaphore(Number.MAX_SAFE_INTEGER);

private readonly options: Options;

constructor(options: Options, progressBar: ProgressBar) {
super(progressBar, CandidateArchiveFileHasher.name);
this.options = options;

// This will be the same value globally, but we can't know the value at file import time
if (options.getReaderThreads() < CandidateArchiveFileHasher.THREAD_SEMAPHORE.getValue()) {
CandidateArchiveFileHasher.THREAD_SEMAPHORE.setValue(options.getReaderThreads());
}
}

/**
* Hash the {@link ArchiveFile}s.
*/
async hash(
dat: DAT,
parentsToCandidates: Map<Parent, ReleaseCandidate[]>,
): Promise<Map<Parent, ReleaseCandidate[]>> {
if (parentsToCandidates.size === 0) {
this.progressBar.logTrace(`${dat.getNameShort()}: no parents to hash ArchiveFiles for`);
return parentsToCandidates;
}

if (!this.options.shouldTest() && !this.options.getOverwriteInvalid()) {
this.progressBar.logTrace(`${dat.getNameShort()}: not testing or overwriting invalid files, no need`);
return parentsToCandidates;
}

const archiveFileCount = [...parentsToCandidates.values()]
.flat()
.flatMap((candidate) => candidate.getRomsWithFiles())
.filter((romWithFiles) => romWithFiles.getInputFile() instanceof ArchiveFile)
.length;
if (archiveFileCount === 0) {
this.progressBar.logTrace(`${dat.getNameShort()}: no ArchiveFiles to hash`);
return parentsToCandidates;
}

this.progressBar.logTrace(`${dat.getNameShort()}: generating ${archiveFileCount.toLocaleString()} hashed ArchiveFile candidate${archiveFileCount !== 1 ? 's' : ''}`);
await this.progressBar.setSymbol(ProgressBarSymbol.HASHING);
await this.progressBar.reset(archiveFileCount);

const hashedParentsToCandidates = this.hashArchiveFiles(parentsToCandidates);

this.progressBar.logTrace(`${dat.getNameShort()}: done generating hashed ArchiveFile candidates`);
return hashedParentsToCandidates;
}

private async hashArchiveFiles(
parentsToCandidates: Map<Parent, ReleaseCandidate[]>,
): Promise<Map<Parent, ReleaseCandidate[]>> {
return new Map((await Promise.all([...parentsToCandidates.entries()]
.map(async ([parent, releaseCandidates]): Promise<[Parent, ReleaseCandidate[]]> => {
const hashedReleaseCandidates = await Promise.all(releaseCandidates
.map(async (releaseCandidate) => {
const hashedRomsWithFiles = await Promise.all(releaseCandidate.getRomsWithFiles()
.map(async (romWithFiles) => {
const inputFile = romWithFiles.getInputFile();
if (!(inputFile instanceof ArchiveFile)) {
return romWithFiles;
}

return CandidateArchiveFileHasher.THREAD_SEMAPHORE.runExclusive(async () => {
await this.progressBar.incrementProgress();
const waitingMessage = `${inputFile.toString()} ...`;
this.progressBar.addWaitingMessage(waitingMessage);

const hashedInputFile = await FileFactory.archiveFileFrom(
inputFile.getArchive(),
inputFile.getChecksumBitmask(),
);
// {@link CandidateGenerator} would have copied undefined values from the input
// file, so we need to modify the expected output file as well for testing
const hashedOutputFile = romWithFiles.getOutputFile().withProps({
size: hashedInputFile.getSize(),
crc32: hashedInputFile.getCrc32(),
md5: hashedInputFile.getMd5(),
sha1: hashedInputFile.getSha1(),
sha256: hashedInputFile.getSha256(),
});
const hashedRomWithFiles = new ROMWithFiles(
romWithFiles.getRom(),
hashedInputFile,
hashedOutputFile,
);

this.progressBar.removeWaitingMessage('');
await this.progressBar.incrementDone();
return hashedRomWithFiles;
});
}));

return new ReleaseCandidate(
releaseCandidate.getGame(),
releaseCandidate.getRelease(),
hashedRomsWithFiles,
);
}));

return [parent, hashedReleaseCandidates];
}))));
}
}
15 changes: 5 additions & 10 deletions src/modules/candidateGenerator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ import Release from '../types/dats/release.js';
import ROM from '../types/dats/rom.js';
import Archive from '../types/files/archives/archive.js';
import ArchiveEntry from '../types/files/archives/archiveEntry.js';
import ArchiveFile from '../types/files/archives/archiveFile.js';
import Zip from '../types/files/archives/zip.js';
import File from '../types/files/file.js';
import { ChecksumBitmask } from '../types/files/fileChecksums.js';
import FileFactory from '../types/files/fileFactory.js';
import IndexedFiles from '../types/indexedFiles.js';
import Options from '../types/options.js';
import OutputFactory, { OutputPath } from '../types/outputFactory.js';
Expand All @@ -26,8 +25,6 @@ import Module from './module.js';
/**
* For every {@link Parent} in the {@link DAT}, look for its {@link ROM}s in the scanned ROM list,
* and return a set of candidate files.
*
* This class may be run concurrently with other classes.
*/
export default class CandidateGenerator extends Module {
private static readonly THREAD_SEMAPHORE = new Semaphore(Number.MAX_SAFE_INTEGER);
Expand Down Expand Up @@ -185,13 +182,11 @@ export default class CandidateGenerator extends Module {
&& !this.options.shouldExtract()
) {
try {
inputFile = await FileFactory.archiveFileFrom(
// Note: we're delaying checksum calculation for now, {@link CandidateArchiveFileHasher}
// will handle it later
inputFile = new ArchiveFile(
inputFile.getArchive(),
// If we're testing, then we need to calculate the archive's checksums, otherwise we
// can skip calculating checksums for efficiency
this.options.shouldTest() || this.options.getOverwriteInvalid()
? inputFile.getChecksumBitmask()
: ChecksumBitmask.NONE,
{ checksumBitmask: inputFile.getChecksumBitmask() },
);
} catch (error) {
this.progressBar.logWarn(`${dat.getNameShort()}: ${game.getName()}: ${error}`);
Expand Down
7 changes: 5 additions & 2 deletions src/types/files/archives/archiveFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ import Archive from './archive.js';
export default class ArchiveFile extends File {
private readonly archive: Archive;

public constructor(archive: Archive, fileProps: FileProps) {
super(fileProps);
public constructor(archive: Archive, fileProps: Omit<FileProps, 'filePath'>) {
super({
...fileProps,
filePath: archive.getFilePath(),
});
this.archive = archive;
}

Expand Down
14 changes: 13 additions & 1 deletion src/types/files/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import ROMHeader from './romHeader.js';
export interface FileProps extends ChecksumProps {
readonly filePath: string;
readonly size?: number;
readonly checksumBitmask?: number;
readonly crc32WithoutHeader?: string;
readonly md5WithoutHeader?: string;
readonly sha1WithoutHeader?: string;
Expand All @@ -34,6 +35,8 @@ export default class File implements FileProps {
@Expose()
readonly size: number;

readonly checksumBitmask?: number;

@Expose()
readonly crc32?: string;

Expand Down Expand Up @@ -63,6 +66,7 @@ export default class File implements FileProps {
protected constructor(fileProps: FileProps) {
this.filePath = path.normalize(fileProps.filePath);
this.size = fileProps.size ?? 0;
this.checksumBitmask = fileProps.checksumBitmask;
this.crc32 = fileProps.crc32?.toLowerCase().replace(/^0x/, '').padStart(8, '0');
this.crc32WithoutHeader = fileProps.crc32WithoutHeader?.toLowerCase().replace(/^0x/, '').padStart(8, '0');
this.md5 = fileProps.md5?.toLowerCase().replace(/^0x/, '').padStart(32, '0');
Expand Down Expand Up @@ -147,6 +151,7 @@ export default class File implements FileProps {
return new File({
filePath: fileProps.filePath,
size: finalSize,
checksumBitmask,
crc32: finalCrcWithHeader,
crc32WithoutHeader: finalCrcWithoutHeader,
md5: finalMd5WithHeader,
Expand Down Expand Up @@ -246,7 +251,7 @@ export default class File implements FileProps {
}

public getChecksumBitmask(): number {
return (this.getCrc32()?.replace(/^0+|0+$/, '') ? ChecksumBitmask.CRC32 : 0)
return this.checksumBitmask ?? (this.getCrc32()?.replace(/^0+|0+$/, '') ? ChecksumBitmask.CRC32 : 0)
| (this.getMd5()?.replace(/^0+|0+$/, '') ? ChecksumBitmask.MD5 : 0)
| (this.getSha1()?.replace(/^0+|0+$/, '') ? ChecksumBitmask.SHA1 : 0)
| (this.getSha256()?.replace(/^0+|0+$/, '') ? ChecksumBitmask.SHA256 : 0);
Expand Down Expand Up @@ -413,6 +418,13 @@ export default class File implements FileProps {
return this.downloadToPath(filePath);
}

withProps(props: Omit<FileProps, 'filePath' | 'fileHeader' | 'patch'>): File {
return new File({
...this,
...props,
});
}

withFilePath(filePath: string): File {
return new File({
...this,
Expand Down
3 changes: 3 additions & 0 deletions test/modules/candidateArchiveFileHasher.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
it('should be tested', () => {
// TODO(cemmer)
});

0 comments on commit e0c40bb

Please sign in to comment.