chainflip-io · dandanlen · Apr 15, 2024 · Mar 18, 2024 · Apr 15, 2024 · dandanlen
@@ -6,31 +6,30 @@ on:
       upgrade-from-release:
         description: 'The release we want to upgrade *from*: "perseverance" or "berghain"'
         required: true
-        default: 'perseverance'
+        default: "perseverance"
       upgrade-to-workflow-name:
-        description: 'Name of the workflow to pull the upgrade-to artefacts from'
+        description: "Name of the workflow to pull the upgrade-to artefacts from"
         required: true
-        default: 'ci-main.yml'
+        default: "ci-main.yml"
       upgrade-to-commit:
-        description: 'Commit to run the upgrade test against. Leave blank to use latest successful workflow run.'
+        description: "Commit to run the upgrade test against. Leave blank to use the latest workflow run."
         required: false
 
   workflow_call:
     inputs:
       upgrade-from-release:
         type: string
         description: 'The release we want to upgrade *from*: "perseverance" or "berghain"'
-        default: 'perseverance'
+        default: "perseverance"
       upgrade-to-workflow-name:
         type: string
-        description: 'Name of the workflow to pull the upgrade-to artefacts from'
-        default: 'ci-main.yml'
+        description: "Name of the workflow to pull the upgrade-to artefacts from"
+        default: "ci-main.yml"
       upgrade-to-commit:
         type: string
-        description: 'Commit to run the upgrade test against. Leave blank to use latest successful workflow run.'
+        description: "Commit to run the upgrade test against. Leave blank to use the latest successful workflow run."
         required: false
 
-
 env:
   FORCE_COLOR: 1
 
@@ -247,6 +246,24 @@ jobs:
           path: |
             /tmp/chainflip/*/chainflip-*.*log
 
+      - name: Upload Chainflip Logs 💾
+        if: always()
+        continue-on-error: true
+        uses: actions/upload-artifact@v3
+        with:
+          name: chainflip-logs
+          path: |
+            /tmp/chainflip/logs/*.log
+
+      - name: Upload Snapshots 💾
+        if: failure()
+        continue-on-error: true
+        uses: actions/upload-artifact@v3
+        with:
+          name: failure-snapshots
+          path: |
+            /tmp/chainflip/snapshots/*.snap
+
       - name: Clean Up docker containers 🧹
         if: always()
         continue-on-error: true

@@ -121,6 +121,8 @@ async function main(): Promise<void> {
 }
 
 runWithTimeout(main(), 15 * 60 * 1000).catch((error) => {
-  console.error(error);
-  process.exit(-1);
+  console.error('upgrade_network exiting due to error: ', error);
+  if (process.exitCode === 0) {
+    process.exitCode = -1;
+  }
 });
@@ -2,32 +2,53 @@
 // https://github.com/paritytech/try-runtime-cli
 
 import { ApiPromise } from '@polkadot/api';
-import { execSync } from 'child_process';
 import path from 'path';
-import os from 'os';
-import fs from 'fs';
 import { compileBinaries } from './utils/compile_binaries';
+import { createTmpDirIfNotExists, execWithRustLog } from './utils/exec_with_log';
 
-function tryRuntimeCommand(runtimePath: string, blockParam: string, networkUrl: string) {
-  // Create a temporary file for capturing stderr
-  const stderrFile = path.join(os.tmpdir(), `cmd-stderr-${Date.now()}`);
-  try {
-    execSync(
-      // TODO: Replace pre-and-post with all after the SDK issue paritytech/polkadot-sdk#2560 is merged.
-      `try-runtime --runtime ${runtimePath} on-runtime-upgrade --disable-spec-version-check --disable-idempotency-checks --checks pre-and-post ${blockParam} --uri ${networkUrl} 2> ${stderrFile}`,
-      { env: { ...process.env, RUST_LOG: 'runtime::executive=debug' } },
-    );
-    console.log(`try-runtime success for blockParam ${blockParam}`);
-  } catch (e) {
-    console.error(`try-runtime failed for blockParam ${blockParam}`);
-    const stderrOutput = fs.readFileSync(stderrFile, 'utf8');
-    console.error(e);
-    console.error('Command failed: Command output:', stderrOutput);
+function createSnapshotFile(networkUrl: string, blockHash: string) {
+  const blockParam = blockHash === 'latest' ? '' : `--at ${blockHash}`;
+  const snapshotFolder = createTmpDirIfNotExists('chainflip/snapshots/');
+  const snapshotOutputPath = path.join(snapshotFolder, `snapshot-at-${blockHash}.snap`);
 
-    fs.unlinkSync(stderrFile);
+  console.log('Writing snapshot to: ', snapshotOutputPath);
 
-    process.exit(-1);
+  execWithRustLog(
+    `try-runtime create-snapshot ${blockParam} --uri ${networkUrl} ${snapshotOutputPath}`,
+    `create-snapshot-${blockHash}`,
+    'runtime::executive=debug',
+    (success) => {
+      if (!success) {
+        console.error('Failed to create snapshot.');
+      }
+      process.exitCode = 1;
+    },
+  );
+}
+
+function tryRuntimeCommand(runtimePath: string, blockHash: 'latest' | string, networkUrl: string) {
+  const blockParam = blockHash === 'latest' ? 'live' : `live --at ${blockHash}`;
+
+  if (process.exitCode === 1) {
+    console.error('TryRuntime error detected. Exiting... CHECK THE NODE LOGS FOR MORE INFO');
+    throw new Error('TryRuntime error detected.');
   }
+
+  execWithRustLog(
+    `try-runtime \
+        --runtime ${runtimePath} on-runtime-upgrade \
+        --disable-spec-version-check \
+        --disable-idempotency-checks \
+        --checks pre-and-post ${blockParam} \
+        --uri ${networkUrl}`,
+    `try-runtime-${blockHash}`,
+    'runtime::executive=debug',
+    (success) => {
+      if (!success) {
+        createSnapshotFile(networkUrl, blockHash);
+      }
+    },
+  );
 }
 
 // 4 options:
@@ -51,8 +72,7 @@ export async function tryRuntimeUpgrade(
     let blockHash = await api.rpc.chain.getBlockHash(blockNumber);
     while (!blockHash.eq(latestBlock)) {
       blockHash = await api.rpc.chain.getBlockHash(blockNumber);
-      tryRuntimeCommand(runtimePath, `live --at ${blockHash}`, networkUrl);
-
+      tryRuntimeCommand(runtimePath, `${blockHash}`, networkUrl);
       blockNumber++;
     }
     console.log(`Block ${latestBlock} has been reached, exiting.`);
@@ -63,17 +83,17 @@ export async function tryRuntimeUpgrade(
     let nextHash = await api.rpc.chain.getBlockHash();
 
     while (blocksProcessed < lastN) {
-      tryRuntimeCommand(runtimePath, `live --at ${nextHash}`, networkUrl);
+      tryRuntimeCommand(runtimePath, `${nextHash}`, networkUrl);
 
       const currentBlockHeader = await api.rpc.chain.getHeader(nextHash);
       nextHash = currentBlockHeader.parentHash;
       blocksProcessed++;
     }
   } else if (block === 'latest') {
-    tryRuntimeCommand(runtimePath, 'live', networkUrl);
+    tryRuntimeCommand(runtimePath, 'latest', networkUrl);
   } else {
     const blockHash = await api.rpc.chain.getBlockHash(block);
-    tryRuntimeCommand(runtimePath, `live --at ${blockHash}`, networkUrl);
+    tryRuntimeCommand(runtimePath, `${blockHash}`, networkUrl);
   }
 
   console.log('try-runtime upgrade successful.');

@@ -8,6 +8,7 @@ import { compareSemVer, sleep } from './utils';
 import { bumpSpecVersionAgainstNetwork } from './utils/spec_version';
 import { compileBinaries } from './utils/compile_binaries';
 import { submitRuntimeUpgradeWithRestrictions } from './submit_runtime_upgrade';
+import { execWithLog } from './utils/exec_with_log';
 
 async function readPackageTomlVersion(projectRoot: string): Promise<string> {
   const data = await fs.readFile(path.join(projectRoot, '/state-chain/runtime/Cargo.toml'), 'utf8');
@@ -49,23 +50,19 @@ async function incompatibleUpgradeNoBuild(
   runtimePath: string,
   numberOfNodes: 1 | 3,
 ) {
-  let selectedNodes;
-  if (numberOfNodes === 1) {
-    selectedNodes = ['bashful'];
-  } else if (numberOfNodes === 3) {
-    selectedNodes = ['bashful', 'doc', 'dopey'];
-  } else {
-    throw new Error('Invalid number of nodes');
-  }
+  const SELECTED_NODES = numberOfNodes === 1 ? 'bashful' : 'bashful doc dopey';
 
   console.log('Starting all the engines');
 
   const nodeCount = numberOfNodes + '-node';
-  execSync(
-    `INIT_RUN=false LOG_SUFFIX="-upgrade" NODE_COUNT=${nodeCount} SELECTED_NODES="${selectedNodes.join(
-      ' ',
-    )}" LOCALNET_INIT_DIR=${localnetInitPath} BINARY_ROOT_PATH=${binaryPath} ${localnetInitPath}/scripts/start-all-engines.sh`,
-  );
+  execWithLog(`${localnetInitPath}/scripts/start-all-engines.sh`, 'start-all-engines-pre-upgrade', {
+    INIT_RUN: 'false',
+    LOG_SUFFIX: '-upgrade',
+    NODE_COUNT: nodeCount,
+    SELECTED_NODES,
+    LOCALNET_INIT_DIR: localnetInitPath,
+    BINARY_ROOT_PATH: binaryPath,
+  });
 
   await sleep(7000);
 
@@ -94,39 +91,58 @@ async function incompatibleUpgradeNoBuild(
 
   const KEYS_DIR = `${localnetInitPath}/keys`;
 
-  const selectedNodesSep = `"${selectedNodes.join(' ')}"`;
-
-  try {
-    const buffer = execSync(
-      `INIT_RPC_PORT=9944 KEYS_DIR=${KEYS_DIR} NODE_COUNT=${nodeCount} SELECTED_NODES=${selectedNodesSep} LOCALNET_INIT_DIR=${localnetInitPath} BINARY_ROOT_PATH=${binaryPath} ${localnetInitPath}/scripts/start-all-nodes.sh`,
-    );
-    console.log('start node success: ' + buffer.toString());
-  } catch (e) {
-    console.error('start node error: ');
-    console.log(e);
-  }
+  execWithLog(`${localnetInitPath}/scripts/start-all-nodes.sh`, 'start-all-nodes', {
+    INIT_RPC_PORT: `${9944}`,
+    KEYS_DIR,
+    NODE_COUNT: nodeCount,
+    SELECTED_NODES,
+    LOCALNET_INIT_DIR: localnetInitPath,
+    BINARY_ROOT_PATH: binaryPath,
+  });
 
   await sleep(20000);
 
   const output = execSync("ps aux | grep chainflip-node | grep -v grep | awk '{print $2}'");
   console.log('New node PID: ' + output.toString());
 
   // Restart the engines
-  execSync(
-    `INIT_RUN=false LOG_SUFFIX="-upgrade" NODE_COUNT=${nodeCount} SELECTED_NODES=${selectedNodesSep} LOCALNET_INIT_DIR=${localnetInitPath} BINARY_ROOT_PATH=${binaryPath} ${localnetInitPath}/scripts/start-all-engines.sh`,
+  execWithLog(
+    `${localnetInitPath}/scripts/start-all-engines.sh`,
+    'start-all-engines-post-upgrade',
+    {
+      INIT_RUN: 'false',
+      LOG_SUFFIX: '-upgrade',
+      NODE_COUNT: nodeCount,
+      SELECTED_NODES,
+      LOCALNET_INIT_DIR: localnetInitPath,
+      BINARY_ROOT_PATH: binaryPath,
+    },
   );
 
   console.log('Starting new broker and lp-api.');
 
-  execSync(`KEYS_DIR=${KEYS_DIR} ${localnetInitPath}/scripts/start-broker-api.sh ${binaryPath}`);
-  execSync(`KEYS_DIR=${KEYS_DIR} ${localnetInitPath}/scripts/start-lp-api.sh ${binaryPath}`);
+  execWithLog(`${localnetInitPath}/scripts/start-broker-api.sh ${binaryPath}`, 'start-broker-api', {
+    KEYS_DIR,
+  });
+
+  execWithLog(`${localnetInitPath}/scripts/start-lp-api.sh ${binaryPath}`, 'start-lp-api', {
+    KEYS_DIR,
+  });
 
   await sleep(20000);
 
-  const brokerPID = execSync('lsof -t -i:10997');
-  console.log('New broker PID: ' + brokerPID.toString());
-  const lpApiPID = execSync('lsof -t -i:10589');
-  console.log('New LP API PID: ' + lpApiPID.toString());
+  for (const [process, port] of [
+    ['broker-api', 10997],
+    ['lp-api', 10589],
+  ]) {
+    try {
+      const pid = execSync(`lsof -t -i:${port}`);
+      console.log(`New ${process} PID: ${pid.toString()}`);
+    } catch (e) {
+      console.error(`Error starting ${process}: ${e}`);
+      throw e;
+    }
+  }
 
   console.log('Started new broker and lp-api.');
 }

@@ -0,0 +1,68 @@
+import { execSync } from 'child_process';
+import path from 'path';
+import os from 'os';
+import fs from 'fs';
+
+export const DEFAULT_LOG_ROOT = 'chainflip/logs/';
+
+export function createTmpDirIfNotExists(dir: string): string {
+  const tmpDir = path.join(os.tmpdir(), dir);
+  try {
+    if (!fs.existsSync(tmpDir)) {
+      fs.mkdirSync(tmpDir, { recursive: true });
+    }
+  } catch (err) {
+    console.error(`Unable to create temporary directory at ${tmpDir}: ${err}`);
+  }
+
+  return tmpDir;
+}
+
+// Resolve the path to the log file, creating the path if it does not exist.
+export function initLogFile(fileName: string, logRoot: string = DEFAULT_LOG_ROOT): string {
+  return path.join(createTmpDirIfNotExists(logRoot), fileName);
+}
+
+function withFileStreamTo(fileName: string, cb: (file: number) => void): fs.WriteStream {
+  const fileStream = fs.createWriteStream(fileName);
+  return fileStream.on('open', (fileDescriptor) => {
+    cb(fileDescriptor);
+    fileStream.close();
+  });
+}
+
+// Execute a command, logging stdout and stderr to a file.
+// The file will be initialised in the default log directory.
+export function execWithLog(
+  command: string,
+  commandAlias: string,
+  additionalEnv: Record<string, string> = {},
+  callback?: (success: boolean) => void,
+) {
+  let success: boolean | undefined;
+  withFileStreamTo(initLogFile(`${commandAlias}.log`), (file) => {
+    try {
+      execSync(`${command}`, {
+        env: { ...process.env, ...additionalEnv },
+        stdio: [0, file, file],
+      });
+      console.debug(`${commandAlias} succeeded`);
+      success = true;
+    } catch (e) {
+      console.error(`${commandAlias} failed: ${e}`);
+      success = false;
+      callback?.(false);
+    }
+  }).on('close', () => {
+    callback?.(success!);
+  });
+}
+
+export function execWithRustLog(
+  command: string,
+  logFileName: string,
+  logLevel: string | undefined = 'info',
+  callback?: (success: boolean) => void,
+) {
+  execWithLog(command, logFileName, { RUST_LOG: logLevel }, callback);
+}