Skip to content

Commit

Permalink
Feature: de-duplicate scanned files before processing (#125)
Browse files Browse the repository at this point in the history
  • Loading branch information
emmercm committed Oct 15, 2022
1 parent 56c5e75 commit 29967a7
Show file tree
Hide file tree
Showing 12 changed files with 110 additions and 87 deletions.
2 changes: 1 addition & 1 deletion .idea/runConfigurations/igir.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 10 additions & 6 deletions src/console/singleBarFormatted.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ export default class SingleBarFormatted {
function clamp(val: number, min: number, max: number): number {
return Math.min(Math.max(val, min), max);
}
const MAX_BUFFER_SIZE = clamp(Math.floor(params.total / 10), 25, 100);
const MAX_BUFFER_SIZE = clamp(Math.floor(params.total / 10), 25, 50);

this.valueTimeBuffer = [
...this.valueTimeBuffer.slice(1 - MAX_BUFFER_SIZE),
Expand All @@ -79,14 +79,14 @@ export default class SingleBarFormatted {

const doneTime = linearRegressionLine(linearRegression(this.valueTimeBuffer))(params.total);
if (Number.isNaN(doneTime)) {
// Vertical line, we got the same value at 2+ different times
return 0;
// Vertical line
return -1;
}
const remaining = (doneTime - Date.now()) / 1000;
if (!Number.isFinite(remaining) || remaining < 0) {
return 0;
if (!Number.isFinite(remaining)) {
return -1;
}
return remaining;
return Math.max(remaining, 0);
}

private static getBar(options: Options, params: Params): string {
Expand All @@ -99,6 +99,10 @@ export default class SingleBarFormatted {
}

private static getEtaFormatted(eta: number): string {
if (eta < 0) {
return 'infinity';
}

const etaInteger = Math.ceil(eta);
const secondsRounded = 5 * Math.round(etaInteger / 5);
if (secondsRounded >= 3600) {
Expand Down
8 changes: 4 additions & 4 deletions src/igir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,22 +101,22 @@ export default class Igir {
ProgressBarCLI.stop();
throw new Error('No valid DAT files found!');
}
await progressBar.doneItems(dats.length, 'DAT', 'found');
await progressBar.doneItems(dats.length, 'unique DAT', 'found');
return dats;
}

private async processROMScanner(): Promise<File[]> {
const progressBar = this.logger.addProgressBar('Scanning for ROMs', Symbols.WAITING);
const romInputs = await new ROMScanner(this.options, progressBar).scan();
await progressBar.doneItems(romInputs.length, 'file', 'found');
await progressBar.doneItems(romInputs.length, 'unique ROM', 'found');
return romInputs;
}

private async processHeaderProcessor(romFiles: File[]): Promise<File[]> {
const headerProcessorProgressBar = this.logger.addProgressBar('Reading ROM headers', Symbols.WAITING);
const headerProcessorProgressBar = this.logger.addProgressBar('Detecting ROM headers', Symbols.WAITING);
const processedRomFiles = await new HeaderProcessor(this.options, headerProcessorProgressBar)
.process(romFiles);
await headerProcessorProgressBar.doneItems(processedRomFiles.length, 'file', 'read');
await headerProcessorProgressBar.doneItems(processedRomFiles.length, 'ROM', 'processed');
return processedRomFiles;
}

Expand Down
19 changes: 3 additions & 16 deletions src/modules/candidateGenerator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,12 @@ export default class CandidateGenerator {
}

private static indexFilesByHashCode(files: File[]): Map<string, File> {
return files.reduce((acc, file) => {
file.hashCodes().forEach((hashCode) => this.addToIndex(acc, hashCode, file));
return acc;
return files.reduce((map, file) => {
file.hashCodes().forEach((hashCode) => map.set(hashCode, file));
return map;
}, new Map<string, File>());
}

private static addToIndex(map: Map<string, File>, hash: string, file: File): void {
if (map.has(hash)) {
// Have already seen file, prefer non-archived files
const existing = map.get(hash) as File;
if (!(file instanceof ArchiveEntry) && existing instanceof ArchiveEntry) {
map.set(hash, file);
}
} else {
// Haven't seen file yet, store it
map.set(hash, file);
}
}

private async buildReleaseCandidateForRelease(
dat: DAT,
game: Game,
Expand Down
17 changes: 2 additions & 15 deletions src/modules/datScanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ export default class DATScanner extends Scanner {
await this.progressBar.setSymbol(Symbols.SEARCHING);
await this.progressBar.reset(datFilePaths.length);

const datFiles = await this.getDatFiles(datFilePaths);
await this.progressBar.logDebug('Enumerating DAT archives');
const datFiles = await this.getFilesFromPaths(datFilePaths, Constants.DAT_SCANNER_THREADS);
await this.progressBar.reset(datFiles.length);

await this.progressBar.logInfo('Deserializing DAT XML to objects');
Expand All @@ -38,20 +39,6 @@ export default class DATScanner extends Scanner {
return dats;
}

// Scan files on disk for DATs (archives may yield more than one DAT)
private async getDatFiles(datFilePaths: string[]): Promise<File[]> {
await this.progressBar.logDebug('Enumerating DAT archives');
return (await async.mapLimit(
datFilePaths,
Constants.DAT_SCANNER_THREADS,
async (datFilePath: string, callback: AsyncResultCallback<File[], Error>) => {
await this.progressBar.logDebug(`${datFilePath}: Reading file`);
const datFiles = await this.getFilesFromPath(datFilePath);
callback(null, datFiles);
},
)).flatMap((datFiles) => datFiles);
}

// Parse each file into a DAT
private async parseDatFiles(datFiles: File[]): Promise<DAT[]> {
await this.progressBar.logDebug('Parsing DAT files');
Expand Down
16 changes: 1 addition & 15 deletions src/modules/romScanner.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import async, { AsyncResultCallback } from 'async';

import { Symbols } from '../console/progressBar.js';
import Constants from '../constants.js';
import File from '../types/files/file.js';
Expand All @@ -22,18 +20,6 @@ export default class ROMScanner extends Scanner {
await this.progressBar.reset(romFilePaths.length);
await this.progressBar.logInfo(`Found ${romFilePaths.length} ROM file${romFilePaths.length !== 1 ? 's' : ''}`);

return (await async.mapLimit(
romFilePaths,
Constants.ROM_SCANNER_THREADS,
async (inputFile, callback: AsyncResultCallback<File[], Error>) => {
await this.progressBar.increment();

const files = await this.getFilesFromPath(inputFile);

callback(null, files);
},
))
.flatMap((files) => files)
.filter((file, idx, files) => files.indexOf(file) === idx);
return this.getFilesFromPaths(romFilePaths, Constants.ROM_SCANNER_THREADS);
}
}
28 changes: 9 additions & 19 deletions src/modules/romWriter.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import async, { AsyncResultCallback } from 'async';
import { Semaphore } from 'async-mutex';
import fs, { promises as fsPromises } from 'fs';
import path from 'path';
Expand Down Expand Up @@ -51,25 +50,16 @@ export default class ROMWriter {
await this.progressBar.setSymbol(Symbols.WRITING);
await this.progressBar.reset(parentsToCandidates.size);

await async.each(
[...parentsToCandidates.entries()],
async (
[, releaseCandidates],
callback: AsyncResultCallback<undefined, Error>,
) => {
await ROMWriter.semaphore.runExclusive(async () => {
await this.progressBar.increment();

/* eslint-disable no-await-in-loop */
for (let j = 0; j < releaseCandidates.length; j += 1) {
const releaseCandidate = releaseCandidates[j];
await this.writeReleaseCandidate(dat, releaseCandidate);
}
await Promise.all([...parentsToCandidates.entries()]
.map(async ([, releaseCandidates]) => ROMWriter.semaphore.runExclusive(async () => {
await this.progressBar.increment();

callback();
});
},
);
/* eslint-disable no-await-in-loop */
for (let j = 0; j < releaseCandidates.length; j += 1) {
const releaseCandidate = releaseCandidates[j];
await this.writeReleaseCandidate(dat, releaseCandidate);
}
})));

await this.progressBar.setSymbol(Symbols.WRITING);
await this.deleteMovedFiles();
Expand Down
68 changes: 67 additions & 1 deletion src/modules/scanner.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import async, { AsyncResultCallback } from 'async';
import path from 'path';

import ProgressBar from '../console/progressBar.js';
import ArchiveFactory from '../types/archives/archiveFactory.js';
import Rar from '../types/archives/rar.js';
import SevenZip from '../types/archives/sevenZip.js';
import Tar from '../types/archives/tar.js';
import Zip from '../types/archives/zip.js';
import ArchiveEntry from '../types/files/archiveEntry.js';
import File from '../types/files/file.js';
import Options from '../types/options.js';

Expand All @@ -13,7 +21,27 @@ export default abstract class Scanner {
this.progressBar = progressBar;
}

protected async getFilesFromPath(filePath: string): Promise<File[]> {
protected async getFilesFromPaths(filePaths: string[], threads: number): Promise<File[]> {
return (await async.mapLimit(
filePaths,
threads,
async (inputFile, callback: AsyncResultCallback<File[], Error>) => {
await this.progressBar.increment();
const files = await this.getFilesFromPath(inputFile);
callback(null, files);
},
))
.flatMap((files) => files)
// Limit to unique files
.sort(this.fileComparator.bind(this))
.filter((one, oneIdx, files) => files.findIndex((two) => {
const oneHashCodes = one.hashCodes();
const twoHashCodes = two.hashCodes();
return twoHashCodes.every((hashCode, hashIdx) => hashCode === oneHashCodes[hashIdx]);
}) === oneIdx);
}

private async getFilesFromPath(filePath: string): Promise<File[]> {
let files: File[];
if (ArchiveFactory.isArchive(filePath)) {
try {
Expand All @@ -30,4 +58,42 @@ export default abstract class Scanner {
}
return files;
}

private fileComparator(one: File, two: File): number {
// Prefer files that are already in the output directory
const output = path.resolve(this.options.getOutput());
const outputSort = (path.resolve(one.getFilePath()).startsWith(output) ? 0 : 1)
- (path.resolve(two.getFilePath()).startsWith(output) ? 0 : 1);
if (outputSort !== 0) {
return outputSort;
}

// Otherwise, prefer non-archives or more efficient archives
const archiveEntrySort = Scanner.archiveEntryPriority(one)
- Scanner.archiveEntryPriority(two);
if (archiveEntrySort !== 0) {
return archiveEntrySort;
}

// Otherwise, we don't particularly care
return one.getFilePath().localeCompare(two.getFilePath());
}

/**
* This ordering should match {@link ArchiveFactory#archiveFrom}
*/
private static archiveEntryPriority(file: File): number {
if (!(file instanceof ArchiveEntry)) {
return 0;
} if (file.getArchive() instanceof Zip) {
return 1;
} if (file.getArchive() instanceof Tar) {
return 2;
} if (file.getArchive() instanceof Rar) {
return 3;
} if (file.getArchive() instanceof SevenZip) {
return 4;
}
return 99;
}
}
3 changes: 3 additions & 0 deletions src/types/archives/archiveFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import Tar from './tar.js';
import Zip from './zip.js';

export default class ArchiveFactory {
/**
* This ordering should match {@link ROMScanner#archiveEntryPriority}
*/
static archiveFrom(filePath: string): Archive {
if (Zip.SUPPORTED_EXTENSIONS.some((ext) => filePath.toLowerCase().endsWith(ext))) {
return new Zip(filePath);
Expand Down
8 changes: 4 additions & 4 deletions test/modules/romScanner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ it('should not throw on bad archives', async () => {

describe('multiple files', () => {
it('no files are excluded', async () => {
const expectedRomFiles = 48;
const expectedRomFiles = 15;
await expect(createRomScanner(['test/fixtures/roms']).scan()).resolves.toHaveLength(expectedRomFiles);
await expect(createRomScanner(['test/fixtures/roms/*', 'test/fixtures/roms/**/*']).scan()).resolves.toHaveLength(expectedRomFiles);
await expect(createRomScanner(['test/fixtures/roms/**/*']).scan()).resolves.toHaveLength(expectedRomFiles);
await expect(createRomScanner(['test/fixtures/roms/**/*', 'test/fixtures/roms/**/*.{rom,zip}']).scan()).resolves.toHaveLength(expectedRomFiles);
});

it('some files are excluded', async () => {
await expect(createRomScanner(['test/fixtures/roms/**/*'], ['test/fixtures/roms/**/*.rom']).scan()).resolves.toHaveLength(41);
await expect(createRomScanner(['test/fixtures/roms/**/*'], ['test/fixtures/roms/**/*.rom', 'test/fixtures/roms/**/*.rom']).scan()).resolves.toHaveLength(41);
await expect(createRomScanner(['test/fixtures/roms/**/*'], ['test/fixtures/roms/**/*.rom', 'test/fixtures/roms/**/*.zip']).scan()).resolves.toHaveLength(32);
await expect(createRomScanner(['test/fixtures/roms/**/*'], ['test/fixtures/roms/**/*.rom']).scan()).resolves.toHaveLength(14);
await expect(createRomScanner(['test/fixtures/roms/**/*'], ['test/fixtures/roms/**/*.rom', 'test/fixtures/roms/**/*.rom']).scan()).resolves.toHaveLength(14);
await expect(createRomScanner(['test/fixtures/roms/**/*'], ['test/fixtures/roms/**/*.rom', 'test/fixtures/roms/**/*.zip']).scan()).resolves.toHaveLength(13);
});

it('all files are excluded', async () => {
Expand Down
8 changes: 4 additions & 4 deletions test/modules/romWriter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ describe('zip', () => {
test.each([
[
'**/!(*headered)/*',
['empty.zip', 'fizzbuzz.zip', 'foobar.zip', 'loremipsum.zip', 'one.zip', 'onetwothree.zip', 'three.zip', 'two.zip', 'unknown.zip'],
['empty.zip', 'fizzbuzz.zip', 'foobar.zip', 'loremipsum.zip', 'one.zip', 'three.zip', 'two.zip', 'unknown.zip'],
],
[
'7z/*',
Expand Down Expand Up @@ -398,7 +398,7 @@ describe('zip', () => {
test.each([
[
'**/!(*headered)/*',
['empty.zip', 'fizzbuzz.zip', 'foobar.zip', 'loremipsum.zip', 'one.zip', 'onetwothree.zip', 'three.zip', 'two.zip', 'unknown.zip'],
['empty.zip', 'fizzbuzz.zip', 'foobar.zip', 'loremipsum.zip', 'one.zip', 'three.zip', 'two.zip', 'unknown.zip'],
['raw/empty.rom', 'raw/fizzbuzz.nes', 'raw/foobar.lnx', 'raw/loremipsum.rom', 'raw/one.rom', 'raw/three.rom', 'raw/two.rom', 'raw/unknown.rom'],
],
[
Expand Down Expand Up @@ -601,7 +601,7 @@ describe('raw', () => {
test.each([
[
'**/!(*headered)/*',
['empty.rom', 'fizzbuzz.nes', 'foobar.lnx', 'loremipsum.rom', 'one.rom', path.join('onetwothree', 'one.rom'), path.join('onetwothree', 'three.rom'), path.join('onetwothree', 'two.rom'), 'three.rom', 'two.rom', 'unknown.rom'],
['empty.rom', 'fizzbuzz.nes', 'foobar.lnx', 'loremipsum.rom', 'one.rom', 'three.rom', 'two.rom', 'unknown.rom'],
],
[
'7z/*',
Expand Down Expand Up @@ -645,7 +645,7 @@ describe('raw', () => {
test.each([
[
'**/!(*headered)/*',
['empty.rom', 'fizzbuzz.nes', 'foobar.lnx', 'loremipsum.rom', 'one.rom', path.join('onetwothree', 'one.rom'), path.join('onetwothree', 'three.rom'), path.join('onetwothree', 'two.rom'), 'three.rom', 'two.rom', 'unknown.rom'],
['empty.rom', 'fizzbuzz.nes', 'foobar.lnx', 'loremipsum.rom', 'one.rom', 'three.rom', 'two.rom', 'unknown.rom'],
['raw/empty.rom', 'raw/fizzbuzz.nes', 'raw/foobar.lnx', 'raw/loremipsum.rom', 'raw/one.rom', 'raw/three.rom', 'raw/two.rom', 'raw/unknown.rom'],
],
[
Expand Down
4 changes: 2 additions & 2 deletions test/types/files/archiveEntry.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ describe('extractToFile', () => {
'./test/fixtures/roms/7z',
],
}), new ProgressBarFake()).scan();
expect(archiveEntries).toHaveLength(21);
expect(archiveEntries).toHaveLength(7);

const temp = fsPoly.mkdtempSync(Constants.GLOBAL_TEMP_DIR);
/* eslint-disable no-await-in-loop */
Expand All @@ -156,7 +156,7 @@ describe('extractToStream', () => {
'./test/fixtures/roms/7z',
],
}), new ProgressBarFake()).scan();
expect(archiveEntries).toHaveLength(21);
expect(archiveEntries).toHaveLength(7);

const temp = fsPoly.mkdtempSync(Constants.GLOBAL_TEMP_DIR);
/* eslint-disable no-await-in-loop */
Expand Down

0 comments on commit 29967a7

Please sign in to comment.