Skip to content

Commit

Permalink
Feature: allow exact-matching of archives in DATs (#1175)
Browse files Browse the repository at this point in the history
  • Loading branch information
emmercm authored Jun 30, 2024
1 parent 9245a31 commit 2b3111f
Show file tree
Hide file tree
Showing 12 changed files with 179 additions and 41 deletions.
16 changes: 16 additions & 0 deletions docs/input/reading-archives.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,22 @@ Somewhat proprietary archive formats such as `.7z` and `.rar` require `igir` to

This is why `igir` uses `.zip` as its output archive of choice, `.zip` files are easy and fast to read, even if they can't offer as high of compression as other formats.

## Exact archive matching

Some DAT files such as the [libretro BIOS System.dat](https://github.com/libretro/libretro-database/blob/master/dat/System.dat) catalog archives such as zip files, rather than the contents of those archives. By default, `igir` will try to detect DATs like these and calculate checksums for all archive files, in addition to the files they contain.

This adds a potentially non-trivial amount of processing time during ROM scanning, so this behavior can be turned off with the option:

```text
--input-checksum-archives never
```

If for some reason `igir` isn't identifying an input file correctly as an archive, this additional processing can be forced with the option:

```text
--input-checksum-archives always
```

## Checksum cache

It can be expensive to calculate checksums of files within archives, especially MD5, SHA1, and SHA256. If `igir` needs to calculate a checksum that is not easily read from the archive (see above), it will cache the result in a file named `igir.cache`. This cached result will then be used as long as the input file's size and modified timestamp remain the same.
Expand Down
35 changes: 31 additions & 4 deletions src/igir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ import DATStatus from './types/datStatus.js';
import File from './types/files/file.js';
import FileCache from './types/files/fileCache.js';
import { ChecksumBitmask } from './types/files/fileChecksums.js';
import FileFactory from './types/files/fileFactory.js';
import IndexedFiles from './types/indexedFiles.js';
import Options from './types/options.js';
import Options, { InputChecksumArchivesMode } from './types/options.js';
import OutputFactory from './types/outputFactory.js';
import Patch from './types/patches/patch.js';
import ReleaseCandidate from './types/releaseCandidate.js';
Expand Down Expand Up @@ -100,7 +101,10 @@ export default class Igir {

// Scan and process input files
let dats = await this.processDATScanner();
const indexedRoms = await this.processROMScanner(this.determineScanningBitmask(dats));
const indexedRoms = await this.processROMScanner(
this.determineScanningBitmask(dats),
this.determineScanningChecksumArchives(dats),
);
const roms = indexedRoms.getFiles();
const patches = await this.processPatchScanner();

Expand Down Expand Up @@ -312,11 +316,34 @@ export default class Igir {
return matchChecksum;
}

private async processROMScanner(checksumBitmask: number): Promise<IndexedFiles> {
private determineScanningChecksumArchives(dats: DAT[]): boolean {
if (this.options.getInputChecksumArchives() === InputChecksumArchivesMode.NEVER) {
return false;
}
if (this.options.getInputChecksumArchives() === InputChecksumArchivesMode.ALWAYS) {
return true;
}
return dats
.some((dat) => dat.getGames()
.some((game) => game.getRoms()
.some((rom) => {
const isArchive = FileFactory.isExtensionArchive(rom.getName());
if (isArchive) {
this.logger.trace(`${dat.getNameShort()}: contains archives, enabling checksum calculation of raw archive contents`);
}
return isArchive;
})));
}

private async processROMScanner(
checksumBitmask: number,
checksumArchives: boolean,
): Promise<IndexedFiles> {
const romScannerProgressBarName = 'Scanning for ROMs';
const romProgressBar = await this.logger.addProgressBar(romScannerProgressBarName);

const rawRomFiles = await new ROMScanner(this.options, romProgressBar).scan(checksumBitmask);
const rawRomFiles = await new ROMScanner(this.options, romProgressBar)
.scan(checksumBitmask, checksumArchives);

await romProgressBar.setName('Detecting ROM headers');
const romFilesWithHeaders = await new ROMHeaderProcessor(this.options, romProgressBar)
Expand Down
12 changes: 11 additions & 1 deletion src/modules/argumentsParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import ConsolePoly from '../polyfill/consolePoly.js';
import { ChecksumBitmask } from '../types/files/fileChecksums.js';
import ROMHeader from '../types/files/romHeader.js';
import Internationalization from '../types/internationalization.js';
import Options, { GameSubdirMode, MergeMode } from '../types/options.js';
import Options, { GameSubdirMode, InputChecksumArchivesMode, MergeMode } from '../types/options.js';
import PatchFactory from '../types/patches/patchFactory.js';

/**
Expand Down Expand Up @@ -209,6 +209,16 @@ export default class ArgumentsParser {
requiresArg: true,
default: ChecksumBitmask[ChecksumBitmask.CRC32].toUpperCase(),
})
.option('input-checksum-archives', {
group: groupRomInput,
description: 'Calculate checksums of archive files themselves, allowing them to match files in DATs',
choices: Object.keys(InputChecksumArchivesMode)
.filter((mode) => Number.isNaN(Number(mode)))
.map((mode) => mode.toLowerCase()),
coerce: ArgumentsParser.getLastValue, // don't allow string[] values
requiresArg: true,
default: InputChecksumArchivesMode[InputChecksumArchivesMode.AUTO].toLowerCase(),
})

.option('dat', {
group: groupDatInput,
Expand Down
52 changes: 37 additions & 15 deletions src/modules/movedRomDeleter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ import ProgressBar, { ProgressBarSymbol } from '../console/progressBar.js';
import ArrayPoly from '../polyfill/arrayPoly.js';
import fsPoly from '../polyfill/fsPoly.js';
import DAT from '../types/dats/dat.js';
import Archive from '../types/files/archives/archive.js';
import ArchiveEntry from '../types/files/archives/archiveEntry.js';
import ArchiveFile from '../types/files/archives/archiveFile.js';
import File from '../types/files/file.js';
import Module from './module.js';

Expand Down Expand Up @@ -79,21 +81,41 @@ export default class MovedROMDeleter extends Module {
movedEntries.flatMap((file) => file.hashCode()),
);

const inputEntries = groupedInputRoms.get(filePath) ?? [];

const unmovedEntries = inputEntries.filter((entry) => {
if (entry instanceof ArchiveEntry
&& movedEntries.length === 1
&& !(movedEntries[0] instanceof ArchiveEntry)
&& movedEntries[0].getFilePath() === entry.getFilePath()
) {
// If the input archive entry was written as a raw archive, then consider it moved
return false;
}

// Otherwise, the entry needs to have been explicitly moved
return !movedEntryHashCodes.has(entry.hashCode());
});
const inputFilesForPath = groupedInputRoms.get(filePath) ?? [];
const inputFileIsArchive = inputFilesForPath
.some((inputFile) => inputFile instanceof ArchiveEntry);

const unmovedFiles = inputFilesForPath
.filter((inputFile) => !(inputFile instanceof ArchiveEntry))
// The input archive entry needs to have been explicitly moved
.filter((inputFile) => !movedEntryHashCodes.has(inputFile.hashCode()));

if (inputFileIsArchive && unmovedFiles.length === 0) {
// The input file is an archive, and it was fully extracted OR the archive file itself was
// an exact match and was moved as-is
return filePath;
}

const unmovedArchiveEntries = inputFilesForPath
.filter((
inputFile,
): inputFile is ArchiveEntry<Archive> => inputFile instanceof ArchiveEntry)
.filter((inputEntry) => {
if (movedEntries.length === 1 && movedEntries[0] instanceof ArchiveFile) {
// If the input archive was written as a raw archive, then consider it moved
return false;
}

// Otherwise, the input archive entry needs to have been explicitly moved
return !movedEntryHashCodes.has(inputEntry.hashCode());
});

if (inputFileIsArchive && unmovedArchiveEntries.length === 0) {
// The input file is an archive and it was fully zipped
return filePath;
}

const unmovedEntries = [...unmovedFiles, ...unmovedArchiveEntries];
if (unmovedEntries.length > 0) {
this.progressBar.logWarn(`${filePath}: not deleting moved file, ${unmovedEntries.length.toLocaleString()} archive entr${unmovedEntries.length !== 1 ? 'ies were' : 'y was'} unmatched:\n${unmovedEntries.sort().map((entry) => ` ${entry}`).join('\n')}`);
return undefined;
Expand Down
6 changes: 5 additions & 1 deletion src/modules/romScanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ export default class ROMScanner extends Scanner {
/**
* Scan for ROM files.
*/
async scan(checksumBitmask: number = ChecksumBitmask.CRC32): Promise<File[]> {
async scan(
checksumBitmask: number = ChecksumBitmask.CRC32,
checksumArchives = false,
): Promise<File[]> {
this.progressBar.logTrace('scanning ROM files');
await this.progressBar.setSymbol(ProgressBarSymbol.SEARCHING);
await this.progressBar.reset(0);
Expand All @@ -31,6 +34,7 @@ export default class ROMScanner extends Scanner {
romFilePaths,
this.options.getReaderThreads(),
checksumBitmask,
checksumArchives,
);

this.progressBar.logTrace('done scanning ROM files');
Expand Down
15 changes: 13 additions & 2 deletions src/modules/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import ElasticSemaphore from '../elasticSemaphore.js';
import Defaults from '../globals/defaults.js';
import ArrayPoly from '../polyfill/arrayPoly.js';
import fsPoly from '../polyfill/fsPoly.js';
import ArchiveEntry from '../types/files/archives/archiveEntry.js';
import File from '../types/files/file.js';
import FileFactory from '../types/files/fileFactory.js';
import Options from '../types/options.js';
Expand All @@ -30,6 +31,7 @@ export default abstract class Scanner extends Module {
filePaths: string[],
threads: number,
checksumBitmask: number,
checksumArchives = false,
): Promise<File[]> {
return (await new DriveSemaphore(threads).map(
filePaths,
Expand All @@ -38,7 +40,7 @@ export default abstract class Scanner extends Module {
const waitingMessage = `${inputFile} ...`;
this.progressBar.addWaitingMessage(waitingMessage);

const files = await this.getFilesFromPath(inputFile, checksumBitmask);
const files = await this.getFilesFromPath(inputFile, checksumBitmask, checksumArchives);

this.progressBar.removeWaitingMessage(waitingMessage);
await this.progressBar.incrementDone();
Expand All @@ -60,6 +62,7 @@ export default abstract class Scanner extends Module {
private async getFilesFromPath(
filePath: string,
checksumBitmask: number,
checksumArchives = false,
): Promise<File[]> {
try {
const totalKilobytes = await fsPoly.size(filePath) / 1024;
Expand All @@ -72,7 +75,15 @@ export default abstract class Scanner extends Module {
return [];
}
}
return FileFactory.filesFrom(filePath, checksumBitmask);

const filesFromPath = await FileFactory.filesFrom(filePath, checksumBitmask);

const fileIsArchive = filesFromPath.some((file) => file instanceof ArchiveEntry);
if (checksumArchives && fileIsArchive) {
filesFromPath.push(await FileFactory.fileFrom(filePath, checksumBitmask));
}

return filesFromPath;
},
totalKilobytes,
);
Expand Down
2 changes: 1 addition & 1 deletion src/types/files/archives/zip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export default class Zip extends Archive {
}

static getExtensions(): string[] {
return ['.zip'];
return ['.zip', '.apk', '.ipa', '.jar', '.pk3'];
}

// eslint-disable-next-line class-methods-use-this
Expand Down
22 changes: 22 additions & 0 deletions src/types/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ import DAT from './dats/dat.js';
import File from './files/file.js';
import { ChecksumBitmask } from './files/fileChecksums.js';

export enum InputChecksumArchivesMode {
// Never calculate the checksum of archive files
NEVER = 1,
// Calculate the checksum of archive files if DATs reference archives
AUTO = 2,
// Always calculate the checksum of archive files
ALWAYS = 3,
}

export enum MergeMode {
// Clones contain all parent ROMs, all games contain BIOS & device ROMs
FULLNONMERGED = 1,
Expand Down Expand Up @@ -49,6 +58,7 @@ export interface OptionsProps {
readonly input?: string[],
readonly inputExclude?: string[],
readonly inputMinChecksum?: string,
readonly inputChecksumArchives?: string,

readonly dat?: string[],
readonly datExclude?: string[],
Expand Down Expand Up @@ -166,6 +176,8 @@ export default class Options implements OptionsProps {

readonly inputMinChecksum?: string;

readonly inputChecksumArchives?: string;

readonly dat: string[];

readonly datExclude: string[];
Expand Down Expand Up @@ -355,6 +367,7 @@ export default class Options implements OptionsProps {
this.input = options?.input ?? [];
this.inputExclude = options?.inputExclude ?? [];
this.inputMinChecksum = options?.inputMinChecksum;
this.inputChecksumArchives = options?.inputChecksumArchives;

this.dat = options?.dat ?? [];
this.datExclude = options?.datExclude ?? [];
Expand Down Expand Up @@ -751,6 +764,15 @@ export default class Options implements OptionsProps {
return ChecksumBitmask[checksumBitmask as keyof typeof ChecksumBitmask];
}

getInputChecksumArchives(): InputChecksumArchivesMode | undefined {
const checksumMode = Object.keys(InputChecksumArchivesMode)
.find((mode) => mode.toLowerCase() === this.inputChecksumArchives?.toLowerCase());
if (!checksumMode) {
return undefined;
}
return InputChecksumArchivesMode[checksumMode as keyof typeof InputChecksumArchivesMode];
}

/**
* Were any DAT paths provided?
*/
Expand Down
3 changes: 2 additions & 1 deletion test/fixtures/dats/one.dat
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
<machine name="Lorem Ipsum">
<description>Lorem Ipsum</description>
<release name="Lorem Ipsum" region="JPN"/>
<rom name="Lorem Ipsum.rom" size="11" crc="70856527" md5="fffcb698d88fbc9425a636ba7e4712a3" sha1="1d913738eb363a4056c19e158aa81189a1eb7a55" status="verified"/>
<!--<rom name="Lorem Ipsum.rom" size="11" crc="70856527" md5="fffcb698d88fbc9425a636ba7e4712a3" sha1="1d913738eb363a4056c19e158aa81189a1eb7a55" status="verified"/>-->
<rom name="Lorem Ipsum.zip" size="203" crc="7ee77289" md5="9d4f876e42a8da0d4ae6f24c665476d9" sha1="25265aea64c1a5809d1b06cb5294a8293fb7027a" status="verified"/>
</machine>
<machine name="One Three">
<description>One Three</description>
Expand Down
Loading

0 comments on commit 2b3111f

Please sign in to comment.