Skip to content

Commit

Permalink
fix(kernel): package cache fails under parallelism (#4215)
Browse files Browse the repository at this point in the history
The package cache mechanism that was turned on by default in #4181 works in theory under parallelism, but not in practice.

Typically the CDK CLI will prevent CDK apps from running in parallel, but Python testing frameworks like `tox` use subprocess parallelism to speed up test runs, leading to the jsii imports being executed at the same time.

Since jsii is sync, the locking needs to be sync. The sync locking feature of the `lockfile` library doesn't have wait support (for good reason), and so when a lock is already acquired by another process it quickly burns through its 12 retries in a hot loop, and then exits with an error.

Two changes to address this:

- (Ab)use `Atomics.wait()` to get a synchronous sleeping primitive; since `lockSync` doesn't support synchronous sleep, we build our own retry loop with synchronous sleep around `lockSync`.
- Since the extracted directory is immutable: if the marker file in the extracted directory exists, we can treat it as evidence that the directory has been completely written and we can skip trying to vy for exclusive access to write it. This avoids all lock contention after the very first CDK app execution.

Fixes #4207.

---

By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license].

[Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0
  • Loading branch information
rix0rrr authored Aug 10, 2023
1 parent d005937 commit b739ef6
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 34 deletions.
134 changes: 122 additions & 12 deletions packages/@jsii/kernel/src/disk-cache/disk-cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {
utimesSync,
writeFileSync,
} from 'fs';
import { lockSync, unlockSync } from 'lockfile';
import { lockSync, Options, unlockSync } from 'lockfile';
import { dirname, join } from 'path';

import { digestFile } from './digest-file';
Expand Down Expand Up @@ -152,9 +152,62 @@ export class Entry {
return join(this.path, MARKER_FILE_NAME);
}

/**
* Whether the directory has been completely written
*
* The presence of the marker file is a signal that we can skip trying to lock the directory.
*/
public get isComplete(): boolean {
return existsSync(this.#markerFile);
}

/**
* Retrieve an entry from the cache
*
* If the entry doesn't exist yet, use 'cb' to produce the file contents.
*/
public retrieve(cb: (path: string) => void): {
path: string;
cache: 'hit' | 'miss';
} {
// If the marker file already exists, update its timestamp and immediately return.
// We don't even try to lock.
if (this.isComplete) {
this.#touchMarkerFile();
return { path: this.path, cache: 'hit' };
}

let cache: 'hit' | 'miss' = 'miss';
this.lock((lock) => {
// While we all fought to acquire the lock, someone else might have completed already.
if (this.isComplete) {
cache = 'hit';
return;
}

// !!!IMPORTANT!!!
// Extract directly into the final target directory, as certain antivirus
// software configurations on Windows will make a `renameSync` operation
// fail with EPERM until the files have been fully analyzed.
mkdirSync(this.path, { recursive: true });
try {
cb(this.path);
} catch (error) {
rmSync(this.path, { force: true, recursive: true });
throw error;
}
lock.markComplete();
});
return { path: this.path, cache };
}

public lock<T>(cb: (entry: LockedEntry) => T): T {
mkdirSync(dirname(this.path), { recursive: true });
lockSync(this.#lockFile, { retries: 12, stale: 5_000 });
lockSyncWithWait(this.#lockFile, {
retries: 12,
// Extracting the largest tarball takes ~5s
stale: 10_000,
});
let disposed = false;
try {
return cb({
Expand All @@ -179,20 +232,13 @@ export class Entry {
mkdirSync(dirname(join(this.path, name)), { recursive: true });
writeFileSync(join(this.path, name), content);
},
touch: () => {
markComplete: () => {
if (disposed) {
throw new Error(
`Cannot touch ${this.path} once the lock block was returned!`,
);
}
if (this.pathExists) {
if (existsSync(this.#markerFile)) {
const now = new Date();
utimesSync(this.#markerFile, now, now);
} else {
writeFileSync(this.#markerFile, '');
}
}
this.#touchMarkerFile();
},
});
} finally {
Expand All @@ -201,6 +247,23 @@ export class Entry {
}
}

/**
* Update the timestamp on the marker file
*/
#touchMarkerFile() {
if (this.pathExists) {
try {
const now = new Date();
utimesSync(this.#markerFile, now, now);
} catch (e: any) {
if (e.code !== 'ENOENT') {
throw e;
}
writeFileSync(this.#markerFile, '');
}
}
}

public read(file: string): Buffer | undefined {
try {
return readFileSync(join(this.path, file));
Expand All @@ -217,7 +280,12 @@ export interface LockedEntry {
delete(): void;
write(name: string, data: Buffer): void;

touch(): void;
/**
* Mark the entry has having been completed
*
* The modification time of this file is used for cleanup.
*/
markComplete(): void;
}

function* directoriesUnder(
Expand All @@ -242,3 +310,45 @@ function* directoriesUnder(
}
}
}

/**
* We must use 'lockSync', but that doesn't support waiting (because waiting is only supported for async APIs)
* so we have to build our own looping locker with waits
*/
function lockSyncWithWait(path: string, options: Options) {
let retries = options.retries ?? 0;
let sleep = 100;

// eslint-disable-next-line no-constant-condition
while (true) {
try {
lockSync(path, {
retries: 0,
stale: options.stale,
});
return;
} catch (e: any) {
if (retries === 0) {
throw e;
}
retries--;

if (e.code === 'EEXIST') {
// Most common case, needs longest sleep. Randomize the herd.
sleepSync(Math.floor(Math.random() * sleep));
sleep *= 1.5;
} else {
sleepSync(5);
}
}
}
}

/**
* Abuse Atomics.wait() to come up with a sync sleep
*
* We must use a sync sleep because all of jsii is sync.
*/
function sleepSync(ms: number) {
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
}
34 changes: 12 additions & 22 deletions packages/@jsii/kernel/src/tar-cache/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ export function extract(
}
}

/**
* Extract the tarball into a cached directory, symlink that directory into the target location
*/
function extractViaCache(
file: string,
outDir: string,
Expand All @@ -66,35 +69,22 @@ function extractViaCache(
const dirCache = DiskCache.inDirectory(cacheRoot);

const entry = dirCache.entryFor(file, ...comments);
const { path, cache } = entry.lock((lock) => {
let cache: 'hit' | 'miss' = 'hit';
if (!entry.pathExists) {
// !!!IMPORTANT!!!
// Extract directly into the final target directory, as certain antivirus
// software configurations on Windows will make a `renameSync` operation
// fail with EPERM until the files have been fully analyzed.
mkdirSync(entry.path, { recursive: true });
try {
untarInto({
...options,
cwd: entry.path,
file,
});
} catch (error) {
rmSync(entry.path, { force: true, recursive: true });
throw error;
}
cache = 'miss';
}
lock.touch();
return { path: entry.path, cache };
const { path, cache } = entry.retrieve((path) => {
untarInto({
...options,
cwd: path,
file,
});
});

link(path, outDir);

return { cache };
}

/**
* Extract directory into the target location
*/
function extractToOutDir(
file: string,
cwd: string,
Expand Down

0 comments on commit b739ef6

Please sign in to comment.