Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: write snapshot if try runtime check fails #4657

Merged
merged 2 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 26 additions & 9 deletions .github/workflows/upgrade-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,30 @@ on:
upgrade-from-release:
description: 'The release we want to upgrade *from*: "perseverance" or "berghain"'
required: true
default: 'perseverance'
default: "perseverance"
upgrade-to-workflow-name:
description: 'Name of the workflow to pull the upgrade-to artefacts from'
description: "Name of the workflow to pull the upgrade-to artefacts from"
required: true
default: 'ci-main.yml'
default: "ci-main.yml"
upgrade-to-commit:
description: 'Commit to run the upgrade test against. Leave blank to use latest successful workflow run.'
description: "Commit to run the upgrade test against. Leave blank to use the latest workflow run."
required: false

workflow_call:
inputs:
upgrade-from-release:
type: string
description: 'The release we want to upgrade *from*: "perseverance" or "berghain"'
default: 'perseverance'
default: "perseverance"
upgrade-to-workflow-name:
type: string
description: 'Name of the workflow to pull the upgrade-to artefacts from'
default: 'ci-main.yml'
description: "Name of the workflow to pull the upgrade-to artefacts from"
default: "ci-main.yml"
upgrade-to-commit:
type: string
description: 'Commit to run the upgrade test against. Leave blank to use latest successful workflow run.'
description: "Commit to run the upgrade test against. Leave blank to use the latest successful workflow run."
required: false


env:
FORCE_COLOR: 1

Expand Down Expand Up @@ -247,6 +246,24 @@ jobs:
path: |
/tmp/chainflip/*/chainflip-*.*log

- name: Upload Chainflip Logs 💾
if: always()
continue-on-error: true
uses: actions/upload-artifact@v3
with:
name: chainflip-logs
path: |
/tmp/chainflip/logs/*.log
kylezs marked this conversation as resolved.
Show resolved Hide resolved

- name: Upload Snapshots 💾
if: failure()
continue-on-error: true
uses: actions/upload-artifact@v3
with:
name: failure-snapshots
path: |
/tmp/chainflip/snapshots/*.snap
kylezs marked this conversation as resolved.
Show resolved Hide resolved

- name: Clean Up docker containers 🧹
if: always()
continue-on-error: true
Expand Down
6 changes: 4 additions & 2 deletions bouncer/commands/upgrade_network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ async function main(): Promise<void> {
}

runWithTimeout(main(), 15 * 60 * 1000).catch((error) => {
console.error(error);
process.exit(-1);
console.error('upgrade_network exiting due to error: ', error);
if (process.exitCode === 0) {
process.exitCode = -1;
kylezs marked this conversation as resolved.
Show resolved Hide resolved
}
});
70 changes: 45 additions & 25 deletions bouncer/shared/try_runtime_upgrade.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,53 @@
// https://github.com/paritytech/try-runtime-cli

import { ApiPromise } from '@polkadot/api';
import { execSync } from 'child_process';
import path from 'path';
import os from 'os';
import fs from 'fs';
import { compileBinaries } from './utils/compile_binaries';
import { createTmpDirIfNotExists, execWithRustLog } from './utils/exec_with_log';

function tryRuntimeCommand(runtimePath: string, blockParam: string, networkUrl: string) {
// Create a temporary file for capturing stderr
const stderrFile = path.join(os.tmpdir(), `cmd-stderr-${Date.now()}`);
try {
execSync(
// TODO: Replace pre-and-post with all after the SDK issue paritytech/polkadot-sdk#2560 is merged.
`try-runtime --runtime ${runtimePath} on-runtime-upgrade --disable-spec-version-check --disable-idempotency-checks --checks pre-and-post ${blockParam} --uri ${networkUrl} 2> ${stderrFile}`,
{ env: { ...process.env, RUST_LOG: 'runtime::executive=debug' } },
);
console.log(`try-runtime success for blockParam ${blockParam}`);
} catch (e) {
console.error(`try-runtime failed for blockParam ${blockParam}`);
const stderrOutput = fs.readFileSync(stderrFile, 'utf8');
console.error(e);
console.error('Command failed: Command output:', stderrOutput);
function createSnapshotFile(networkUrl: string, blockHash: string) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is new.

const blockParam = blockHash === 'latest' ? '' : `--at ${blockHash}`;
const snapshotFolder = createTmpDirIfNotExists('chainflip/snapshots/');
const snapshotOutputPath = path.join(snapshotFolder, `snapshot-at-${blockHash}.snap`);

fs.unlinkSync(stderrFile);
console.log('Writing snapshot to: ', snapshotOutputPath);

process.exit(-1);
execWithRustLog(
`try-runtime create-snapshot ${blockParam} --uri ${networkUrl} ${snapshotOutputPath}`,
`create-snapshot-${blockHash}`,
'runtime::executive=debug',
(success) => {
if (!success) {
console.error('Failed to create snapshot.');
}
process.exitCode = 1;
},
);
}

function tryRuntimeCommand(runtimePath: string, blockHash: 'latest' | string, networkUrl: string) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just refactored to use execWithRustLog and write a snapshot if it fails.

const blockParam = blockHash === 'latest' ? 'live' : `live --at ${blockHash}`;

if (process.exitCode === 1) {
console.error('TryRuntime error detected. Exiting... CHECK THE NODE LOGS FOR MORE INFO');
throw new Error('TryRuntime error detected.');
}

execWithRustLog(
`try-runtime \
--runtime ${runtimePath} on-runtime-upgrade \
--disable-spec-version-check \
--disable-idempotency-checks \
--checks pre-and-post ${blockParam} \
--uri ${networkUrl}`,
`try-runtime-${blockHash}`,
'runtime::executive=debug',
(success) => {
if (!success) {
createSnapshotFile(networkUrl, blockHash);
}
},
);
}

// 4 options:
Expand All @@ -51,8 +72,7 @@ export async function tryRuntimeUpgrade(
let blockHash = await api.rpc.chain.getBlockHash(blockNumber);
while (!blockHash.eq(latestBlock)) {
blockHash = await api.rpc.chain.getBlockHash(blockNumber);
tryRuntimeCommand(runtimePath, `live --at ${blockHash}`, networkUrl);

tryRuntimeCommand(runtimePath, `${blockHash}`, networkUrl);
kylezs marked this conversation as resolved.
Show resolved Hide resolved
blockNumber++;
}
console.log(`Block ${latestBlock} has been reached, exiting.`);
Expand All @@ -63,17 +83,17 @@ export async function tryRuntimeUpgrade(
let nextHash = await api.rpc.chain.getBlockHash();

while (blocksProcessed < lastN) {
tryRuntimeCommand(runtimePath, `live --at ${nextHash}`, networkUrl);
tryRuntimeCommand(runtimePath, `${nextHash}`, networkUrl);

const currentBlockHeader = await api.rpc.chain.getHeader(nextHash);
nextHash = currentBlockHeader.parentHash;
blocksProcessed++;
}
} else if (block === 'latest') {
tryRuntimeCommand(runtimePath, 'live', networkUrl);
tryRuntimeCommand(runtimePath, 'latest', networkUrl);
} else {
const blockHash = await api.rpc.chain.getBlockHash(block);
tryRuntimeCommand(runtimePath, `live --at ${blockHash}`, networkUrl);
tryRuntimeCommand(runtimePath, `${blockHash}`, networkUrl);
}

console.log('try-runtime upgrade successful.');
Expand Down
80 changes: 48 additions & 32 deletions bouncer/shared/upgrade_network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { compareSemVer, sleep } from './utils';
import { bumpSpecVersionAgainstNetwork } from './utils/spec_version';
import { compileBinaries } from './utils/compile_binaries';
import { submitRuntimeUpgradeWithRestrictions } from './submit_runtime_upgrade';
import { execWithLog } from './utils/exec_with_log';

async function readPackageTomlVersion(projectRoot: string): Promise<string> {
const data = await fs.readFile(path.join(projectRoot, '/state-chain/runtime/Cargo.toml'), 'utf8');
Expand Down Expand Up @@ -49,23 +50,19 @@ async function incompatibleUpgradeNoBuild(
runtimePath: string,
numberOfNodes: 1 | 3,
) {
let selectedNodes;
if (numberOfNodes === 1) {
selectedNodes = ['bashful'];
} else if (numberOfNodes === 3) {
selectedNodes = ['bashful', 'doc', 'dopey'];
} else {
throw new Error('Invalid number of nodes');
}
const SELECTED_NODES = numberOfNodes === 1 ? 'bashful' : 'bashful doc dopey';

console.log('Starting all the engines');

const nodeCount = numberOfNodes + '-node';
execSync(
`INIT_RUN=false LOG_SUFFIX="-upgrade" NODE_COUNT=${nodeCount} SELECTED_NODES="${selectedNodes.join(
' ',
)}" LOCALNET_INIT_DIR=${localnetInitPath} BINARY_ROOT_PATH=${binaryPath} ${localnetInitPath}/scripts/start-all-engines.sh`,
);
execWithLog(`${localnetInitPath}/scripts/start-all-engines.sh`, 'start-all-engines-pre-upgrade', {
INIT_RUN: 'false',
LOG_SUFFIX: '-upgrade',
NODE_COUNT: nodeCount,
SELECTED_NODES,
LOCALNET_INIT_DIR: localnetInitPath,
BINARY_ROOT_PATH: binaryPath,
kylezs marked this conversation as resolved.
Show resolved Hide resolved
});

await sleep(7000);

Expand Down Expand Up @@ -94,39 +91,58 @@ async function incompatibleUpgradeNoBuild(

const KEYS_DIR = `${localnetInitPath}/keys`;

const selectedNodesSep = `"${selectedNodes.join(' ')}"`;

try {
const buffer = execSync(
`INIT_RPC_PORT=9944 KEYS_DIR=${KEYS_DIR} NODE_COUNT=${nodeCount} SELECTED_NODES=${selectedNodesSep} LOCALNET_INIT_DIR=${localnetInitPath} BINARY_ROOT_PATH=${binaryPath} ${localnetInitPath}/scripts/start-all-nodes.sh`,
);
console.log('start node success: ' + buffer.toString());
} catch (e) {
console.error('start node error: ');
console.log(e);
}
execWithLog(`${localnetInitPath}/scripts/start-all-nodes.sh`, 'start-all-nodes', {
kylezs marked this conversation as resolved.
Show resolved Hide resolved
INIT_RPC_PORT: `${9944}`,
KEYS_DIR,
NODE_COUNT: nodeCount,
SELECTED_NODES,
LOCALNET_INIT_DIR: localnetInitPath,
BINARY_ROOT_PATH: binaryPath,
});

await sleep(20000);

const output = execSync("ps aux | grep chainflip-node | grep -v grep | awk '{print $2}'");
console.log('New node PID: ' + output.toString());

// Restart the engines
execSync(
`INIT_RUN=false LOG_SUFFIX="-upgrade" NODE_COUNT=${nodeCount} SELECTED_NODES=${selectedNodesSep} LOCALNET_INIT_DIR=${localnetInitPath} BINARY_ROOT_PATH=${binaryPath} ${localnetInitPath}/scripts/start-all-engines.sh`,
execWithLog(
`${localnetInitPath}/scripts/start-all-engines.sh`,
'start-all-engines-post-upgrade',
{
INIT_RUN: 'false',
kylezs marked this conversation as resolved.
Show resolved Hide resolved
LOG_SUFFIX: '-upgrade',
NODE_COUNT: nodeCount,
SELECTED_NODES,
LOCALNET_INIT_DIR: localnetInitPath,
BINARY_ROOT_PATH: binaryPath,
},
);

console.log('Starting new broker and lp-api.');

execSync(`KEYS_DIR=${KEYS_DIR} ${localnetInitPath}/scripts/start-broker-api.sh ${binaryPath}`);
execSync(`KEYS_DIR=${KEYS_DIR} ${localnetInitPath}/scripts/start-lp-api.sh ${binaryPath}`);
execWithLog(`${localnetInitPath}/scripts/start-broker-api.sh ${binaryPath}`, 'start-broker-api', {
KEYS_DIR,
});
kylezs marked this conversation as resolved.
Show resolved Hide resolved

execWithLog(`${localnetInitPath}/scripts/start-lp-api.sh ${binaryPath}`, 'start-lp-api', {
KEYS_DIR,
});
kylezs marked this conversation as resolved.
Show resolved Hide resolved

await sleep(20000);

const brokerPID = execSync('lsof -t -i:10997');
console.log('New broker PID: ' + brokerPID.toString());
const lpApiPID = execSync('lsof -t -i:10589');
console.log('New LP API PID: ' + lpApiPID.toString());
for (const [process, port] of [
['broker-api', 10997],
['lp-api', 10589],
]) {
try {
const pid = execSync(`lsof -t -i:${port}`);
console.log(`New ${process} PID: ${pid.toString()}`);
} catch (e) {
console.error(`Error starting ${process}: ${e}`);
kylezs marked this conversation as resolved.
Show resolved Hide resolved
throw e;
}
}

console.log('Started new broker and lp-api.');
}
Expand Down
68 changes: 68 additions & 0 deletions bouncer/shared/utils/exec_with_log.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { execSync } from 'child_process';
import path from 'path';
import os from 'os';
import fs from 'fs';

export const DEFAULT_LOG_ROOT = 'chainflip/logs/';

export function createTmpDirIfNotExists(dir: string): string {
const tmpDir = path.join(os.tmpdir(), dir);
try {
if (!fs.existsSync(tmpDir)) {
fs.mkdirSync(tmpDir, { recursive: true });
}
} catch (err) {
console.error(`Unable to create temporary directory at ${tmpDir}: ${err}`);
}

return tmpDir;
}

// Resolve the path to the log file, creating the path if it does not exist.
export function initLogFile(fileName: string, logRoot: string = DEFAULT_LOG_ROOT): string {
return path.join(createTmpDirIfNotExists(logRoot), fileName);
}

function withFileStreamTo(fileName: string, cb: (file: number) => void): fs.WriteStream {
const fileStream = fs.createWriteStream(fileName);
return fileStream.on('open', (fileDescriptor) => {
cb(fileDescriptor);
fileStream.close();
});
}

// Execute a command, logging stdout and stderr to a file.
// The file will be initialised in the default log directory.
export function execWithLog(
command: string,
commandAlias: string,
additionalEnv: Record<string, string> = {},
callback?: (success: boolean) => void,
) {
let success: boolean | undefined;
withFileStreamTo(initLogFile(`${commandAlias}.log`), (file) => {
try {
execSync(`${command}`, {
env: { ...process.env, ...additionalEnv },
stdio: [0, file, file],
});
console.debug(`${commandAlias} succeeded`);
success = true;
} catch (e) {
console.error(`${commandAlias} failed: ${e}`);
success = false;
callback?.(false);
}
}).on('close', () => {
callback?.(success!);
});
}

export function execWithRustLog(
command: string,
logFileName: string,
logLevel: string | undefined = 'info',
callback?: (success: boolean) => void,
) {
execWithLog(command, logFileName, { RUST_LOG: logLevel }, callback);
}
Loading