From fef94c63c0e67109964b818a8d27abfe07d47e9e Mon Sep 17 00:00:00 2001 From: Philip Langer Date: Sun, 1 Oct 2023 20:52:26 +0200 Subject: [PATCH 1/4] WIP: Average and best of 10 values WIth this change we run the performance tests 10 times, and record the average and the best of 10 in the performance report. This will hopefully lead to more stable results. Contributed on behalf of STMicroelectronics. --- .github/workflows/performance.yml | 148 +++++++++++++++++++++++++++--- scripts/fetch-metrics.ts | 3 +- scripts/performance-report.ts | 8 ++ 3 files changed, 145 insertions(+), 14 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 1706398cd..7d14c723b 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -2,35 +2,32 @@ name: Performance on: workflow_dispatch: + pull_request: # FIXME JUST FOR TESTING + branches: + - main schedule: - cron: "0 5 * * *" # Runs every day at 5am: https://docs.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule jobs: - build: + prepare: name: Performance benchmark on ubuntu-latest with Node.js 16.x - runs-on: ubuntu-latest - timeout-minutes: 60 - + timeout-minutes: 20 steps: - name: Checkout uses: actions/checkout@v3 - - name: Use Node.js "16.x" uses: actions/setup-node@v3 with: node-version: "16.x" registry-url: "https://registry.npmjs.org" - - name: Use Python 3.x uses: actions/setup-python@v4 with: python-version: "3.x" - - name: Build shell: bash run: yarn - - name: Checkout Theia uses: actions/checkout@v3 with: @@ -57,16 +54,143 @@ jobs: env: NODE_OPTIONS: --max_old_space_size=4096 GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # https://github.com/microsoft/vscode-ripgrep/issues/9 - - name: Run Theia + - name: Cache Build Result + uses: actions/cache@v3 + id: build-result + with: + path: ./* + key: ${{ github.run_number }} + + run-test: + name: Performance benchmark on ubuntu-latest with Node.js 16.x + runs-on: ubuntu-latest + needs: [prepare] + strategy: + matrix: + run: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + timeout-minutes: 40 + steps: + - name: Restore Build Result (#${{ matrix.run }}) + uses: actions/cache/restore@v3 + id: build-result + with: + path: ./* + key: ${{ github.run_number }} + fail-on-cache-miss: true + - name: Use Node.js "16.x" + uses: actions/setup-node@v3 + with: + node-version: "16.x" + registry-url: "https://registry.npmjs.org" + - name: Use Python 3.x + uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Run Theia (#${{ matrix.run }}) shell: bash working-directory: ./theia run: yarn browser start:debug & - - - name: Run Performance Measurement + - name: Run Performance Measurement (#${{ matrix.run }}) uses: GabrielBB/xvfb-action@v1 + env: + RUN_NO: ${{ matrix.run }} with: run: yarn performance + # - name: Cache Performance Measurement (#${{ matrix.run }}) + # uses: actions/cache@v3 + # id: performance-measurement-${{ matrix.run }} + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-${{ matrix.run }} + generate-report: + name: Performance benchmark on ubuntu-latest with Node.js 16.x + runs-on: ubuntu-latest + needs: [run-test] + steps: + - name: Restore Build Result + uses: actions/cache/restore@v3 + id: build-result + with: + path: ./* + key: ${{ github.run_number }} + fail-on-cache-miss: true + # =============================================================================================== + # START RESTORING PERFORMANCE MEASUREMENTS FROM CACHE + # we have to restore the performance measurement of each run individually + # so the following restore steps need to be aligned with the matrix.run variable in the job above + # - name: Restore Performance Measurement 0 + # uses: actions/cache/restore@v3 + # id: performance-measurement-0 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-0 + # fail-on-cache-miss: true + # - name: Restore Performance Measurement 1 + # uses: actions/cache/restore@v3 + # id: performance-measurement-1 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-1 + # fail-on-cache-miss: true + # - name: Restore Performance Measurement 2 + # uses: actions/cache/restore@v3 + # id: performance-measurement-2 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-2 + # fail-on-cache-miss: true + # - name: Restore Performance Measurement 3 + # uses: actions/cache/restore@v3 + # id: performance-measurement-3 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-3 + # fail-on-cache-miss: true + # - name: Restore Performance Measurement 4 + # uses: actions/cache/restore@v3 + # id: performance-measurement-4 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-4 + # fail-on-cache-miss: true + # - name: Restore Performance Measurement 5 + # uses: actions/cache/restore@v3 + # id: performance-measurement-5 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-5 + # fail-on-cache-miss: true + # - name: Restore Performance Measurement 6 + # uses: actions/cache/restore@v3 + # id: performance-measurement-6 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-6 + # fail-on-cache-miss: true + # - name: Restore Performance Measurement 7 + # uses: actions/cache/restore@v3 + # id: performance-measurement-7 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-7 + # fail-on-cache-miss: true + # - name: Restore Performance Measurement 8 + # uses: actions/cache/restore@v3 + # id: performance-measurement-8 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-8 + # fail-on-cache-miss: true + # - name: Restore Performance Measurement 9 + # uses: actions/cache/restore@v3 + # id: performance-measurement-9 + # with: + # path: ./performance-metrics + # key: ${{ github.run_number }}-9 + # fail-on-cache-miss: true + # END RESTORING PERFORMANCE MEASUREMENTS FROM CACHE + # =============================================================================================== - name: Get History uses: actions/checkout@v2 if: always() && github.ref == 'refs/heads/main' @@ -74,12 +198,10 @@ jobs: with: ref: gh-pages path: gh-pages - - name: Prepare Report if: always() && github.ref == 'refs/heads/main' shell: bash run: yarn performance-report - - name: Publish Report if: always() && github.ref == 'refs/heads/main' uses: peaceiris/actions-gh-pages@v2 diff --git a/scripts/fetch-metrics.ts b/scripts/fetch-metrics.ts index cf4b02eed..74edfba6b 100644 --- a/scripts/fetch-metrics.ts +++ b/scripts/fetch-metrics.ts @@ -42,7 +42,8 @@ export async function fetchPerformanceMetrics({ const dateString = `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}`; const timeString = `${now.getHours()}-${now.getMinutes()}-${now.getSeconds()}`; const timestamp = `${dateString}T${timeString}`; - const fileName = outputFileNamePrefix + timestamp + outputFileNamePostfix; + const runNo = process.env.RUN_NO ? `_${process.env.RUN_NO}` : ''; + const fileName = outputFileNamePrefix + timestamp + runNo + outputFileNamePostfix; const targetFile = `${outputFilePath}/${fileName}`; fs.ensureDirSync(outputFilePath); diff --git a/scripts/performance-report.ts b/scripts/performance-report.ts index c494bd421..902f35cb5 100644 --- a/scripts/performance-report.ts +++ b/scripts/performance-report.ts @@ -102,6 +102,14 @@ export async function generatePerformanceReport(path: string) { 'process_cpu_seconds_total', 'playwright_total_time' ]); + + // TODO Post process values: + // if label ends with _X + // take current as label + // collect all values until X of _X is smaller than previous or doesn't exist + // replace collected with one that has a computed average and best of 1ß value of collected + // else: set current value as average and best of 10 + const charts: string[] = []; for (const [valueLabel, valueHistory] of values) { const data = valueHistory.history.map(entry => ({ x: entry.entryLabel, y: entry.value })); From 084531511c114e1548b4de4a5c4140813e2760e6 Mon Sep 17 00:00:00 2001 From: Philip Langer Date: Mon, 2 Oct 2023 17:22:47 +0200 Subject: [PATCH 2/4] Try adding measurements incrementally to gh-pages --- .github/workflows/performance.yml | 99 +++++-------------------------- 1 file changed, 16 insertions(+), 83 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 7d14c723b..8bf7c0adf 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -86,6 +86,11 @@ jobs: uses: actions/setup-python@v4 with: python-version: "3.x" + - name: Get History + uses: actions/checkout@v4 + with: + ref: gh-pages + path: gh-pages - name: Run Theia (#${{ matrix.run }}) shell: bash working-directory: ./theia @@ -96,12 +101,11 @@ jobs: RUN_NO: ${{ matrix.run }} with: run: yarn performance - # - name: Cache Performance Measurement (#${{ matrix.run }}) - # uses: actions/cache@v3 - # id: performance-measurement-${{ matrix.run }} - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-${{ matrix.run }} + - name: Upload performance measurement (#${{ matrix.run }}) + uses: actions/upload-artifact@v3 + with: + name: performance-measurement-${{ github.run_number }} + path: performance-metrics/* generate-report: name: Performance benchmark on ubuntu-latest with Node.js 16.x @@ -115,89 +119,18 @@ jobs: path: ./* key: ${{ github.run_number }} fail-on-cache-miss: true - # =============================================================================================== - # START RESTORING PERFORMANCE MEASUREMENTS FROM CACHE - # we have to restore the performance measurement of each run individually - # so the following restore steps need to be aligned with the matrix.run variable in the job above - # - name: Restore Performance Measurement 0 - # uses: actions/cache/restore@v3 - # id: performance-measurement-0 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-0 - # fail-on-cache-miss: true - # - name: Restore Performance Measurement 1 - # uses: actions/cache/restore@v3 - # id: performance-measurement-1 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-1 - # fail-on-cache-miss: true - # - name: Restore Performance Measurement 2 - # uses: actions/cache/restore@v3 - # id: performance-measurement-2 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-2 - # fail-on-cache-miss: true - # - name: Restore Performance Measurement 3 - # uses: actions/cache/restore@v3 - # id: performance-measurement-3 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-3 - # fail-on-cache-miss: true - # - name: Restore Performance Measurement 4 - # uses: actions/cache/restore@v3 - # id: performance-measurement-4 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-4 - # fail-on-cache-miss: true - # - name: Restore Performance Measurement 5 - # uses: actions/cache/restore@v3 - # id: performance-measurement-5 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-5 - # fail-on-cache-miss: true - # - name: Restore Performance Measurement 6 - # uses: actions/cache/restore@v3 - # id: performance-measurement-6 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-6 - # fail-on-cache-miss: true - # - name: Restore Performance Measurement 7 - # uses: actions/cache/restore@v3 - # id: performance-measurement-7 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-7 - # fail-on-cache-miss: true - # - name: Restore Performance Measurement 8 - # uses: actions/cache/restore@v3 - # id: performance-measurement-8 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-8 - # fail-on-cache-miss: true - # - name: Restore Performance Measurement 9 - # uses: actions/cache/restore@v3 - # id: performance-measurement-9 - # with: - # path: ./performance-metrics - # key: ${{ github.run_number }}-9 - # fail-on-cache-miss: true - # END RESTORING PERFORMANCE MEASUREMENTS FROM CACHE - # =============================================================================================== - name: Get History - uses: actions/checkout@v2 + uses: actions/checkout@v4 if: always() && github.ref == 'refs/heads/main' continue-on-error: true with: ref: gh-pages path: gh-pages + - name: Download Performance Measurements + uses: actions/download-artifact@v3 + with: + name: performance-measurement-${{ github.run_number }} + path: performance-metrics - name: Prepare Report if: always() && github.ref == 'refs/heads/main' shell: bash From 50ced9e1abb6eced6a8c64e3210fd494a1fafa40 Mon Sep 17 00:00:00 2001 From: Philip Langer Date: Tue, 3 Oct 2023 21:34:16 +0200 Subject: [PATCH 3/4] Combine runs of a measurement into one entry Compute average and best of 10 from multiple runs of a single measurement. Mark those entries with `[]` in the report. Contributed on behalf of STMicroelectronics. --- scripts/performance-report.ts | 249 +++++++++++++++++++++------------- 1 file changed, 155 insertions(+), 94 deletions(-) diff --git a/scripts/performance-report.ts b/scripts/performance-report.ts index 902f35cb5..f999f25fd 100644 --- a/scripts/performance-report.ts +++ b/scripts/performance-report.ts @@ -19,6 +19,8 @@ import * as fs from 'fs-extra'; import * as readline from 'readline'; import yargs from 'yargs'; +const matchUntilUnderscoreOrDot = /^([^\.\_])+/; + (async () => { const options: PerformanceReportParams = yargs(process.argv) .option('ghPagesPath', { @@ -66,8 +68,9 @@ export async function preparePerformanceReport({ console.log('Copying history'); fs.emptyDirSync(publishPath); fs.copySync(ghPagesPath, publishPath, { filter: (src, dest) => !src.includes('.git') }); - // copy latest performance metrics into performance publish path + // harmonize file names and copy latest performance metrics into performance publish path console.log('Copying performance metrics'); + harmonizeFileNames(performanceMetricsPath); fs.ensureDirSync(`${publishPath}/${performancePublishPath}`); fs.copySync(performanceMetricsPath, `${publishPath}/${performancePublishPath}`); // generate performance report @@ -75,9 +78,35 @@ export async function preparePerformanceReport({ generatePerformanceReport(`${publishPath}/${performancePublishPath}`); } +/** + * We expect all files in this path to be measurements of the same job but from different runs. + * Thus they may have slightly different timestamps. In order to have a consistent dataset, + * we harmonize the timestamp of their file names and just distinguish them by their run number. + * @param path Path to the performance metrics files. + */ +function harmonizeFileNames(path: string) { + const files = fs.readdirSync(path).sort(sortByDateAndRunNumber); + if (files.length <= 1) { + return; + } + let referenceFileName = files.find(f => f.match(matchUntilUnderscoreOrDot))?.match(matchUntilUnderscoreOrDot); + if (!referenceFileName || referenceFileName.length < 1) { + return; + } + for (const file of files) { + const fileNameWithoutExtension = file.substring(0, file.lastIndexOf('.')); + const runNumber = getRunNumber(fileNameWithoutExtension); + if (runNumber >= 0) { + fs.renameSync(`${path}/${file}`, `${path}/${referenceFileName[0]}_${runNumber}.txt`); + } + } +} + interface ValueHistoryEntry { entryLabel: string; value: number; + best?: number; + combinesRuns?: string[]; } interface ValueHistory { valueLabel: string; @@ -103,16 +132,12 @@ export async function generatePerformanceReport(path: string) { 'playwright_total_time' ]); - // TODO Post process values: - // if label ends with _X - // take current as label - // collect all values until X of _X is smaller than previous or doesn't exist - // replace collected with one that has a computed average and best of 1ß value of collected - // else: set current value as average and best of 10 + const processedValues = processValues(values); const charts: string[] = []; - for (const [valueLabel, valueHistory] of values) { + for (const [valueLabel, valueHistory] of processedValues) { const data = valueHistory.history.map(entry => ({ x: entry.entryLabel, y: entry.value })); + const best = valueHistory.history.map(entry => ({ x: entry.entryLabel, y: entry.best ?? entry.value })); const valueId = valueLabel.replace('/', '_'); charts.push(`
@@ -123,21 +148,16 @@ export async function generatePerformanceReport(path: string) { new Chart(ctx${valueId}, { type: 'line', data: { - datasets: [{ - label: '${valueLabel}', - data: ${JSON.stringify(data)} - }] - }, - options: { - plugins: { - annotation: { - annotations: { - averageLine, - stdDerivationUpper, - stdDerivationLower - } - } - } + datasets: [ + { + label: '${valueLabel} (average of 10 runs)', + data: ${JSON.stringify(data)} + }, + { + label: '${valueLabel} (best of 10 runs)', + data: ${JSON.stringify(best)} + }, + ] } }); @@ -155,74 +175,6 @@ export async function generatePerformanceReport(path: string) { - @@ -240,9 +192,7 @@ export async function generatePerformanceReport(path: string) { export async function readValuesFromHistory(path: string, values: string[]): Promise> { const valueHistoryMap = initializeValueHistoryMap(values); - const files = fs.readdirSync(path) - .filter(file => !file.endsWith('index.html')) - .sort((a, b) => toDate(a).getTime() - toDate(b).getTime()); + const files = fs.readdirSync(path).filter(hasTxtExtension).sort(sortByDateAndRunNumber); for (const file of files) { const entryLabel = file.substring(0, file.indexOf('.')); const entries = await readEntries(path + '/' + file, values); @@ -253,6 +203,27 @@ export async function readValuesFromHistory(path: string, values: string[]): Pro return valueHistoryMap; } +export function hasTxtExtension(file: string): unknown { + return file.endsWith('.txt'); +} + +export function sortByDateAndRunNumber(a: string, b: string): number { + const runNumberA = extractRunNumber(a); + const runNumberB = extractRunNumber(b); + const dateStringA = extractDateString(a, runNumberA); + const dateStringB = extractDateString(b, runNumberB); + const dateComparison = toDate(dateStringA).getTime() - toDate(dateStringB).getTime(); + return dateComparison !== 0 ? dateComparison : runNumberA - runNumberB; +} + +export function extractRunNumber(fileName: string) { + return getRunNumber(fileName.replace('.txt', '')); +} + +export function extractDateString(fileName: string, runNumber: number) { + return runNumber < 0 ? fileName : fileName.substring(0, fileName.indexOf(`_${runNumber}`)); +} + export function initializeValueHistoryMap(values: string[]) { const valueHistoryMap = new Map(); for (const value of values) { @@ -301,3 +272,93 @@ export async function readEntries(path: string, values: string[]): Promise<{ val } return entries; } + +/** + * Post-processes the values read from the metrics files. + * + * The input is a map of value labels (e.g. `'theia_measurements/frontend`) to the history of values for this label. + * Each history element has itself a label, which denotes the date and optionally the run, e.g. `2023-10-1T19-39-3_2`, + * whereas the measurement has been recorded on Oct 1st of 2023 at 19:39:03 in the second run (`_2`). + * + * All runs for a single measurement will be combined into a single entry. Measurements that don't belong to a run will + * remain a single entry. + * The algorithm for combining multiple runs into a single entry is strongly based on the assumption that the + * list of `ValueHistoryEntry` instances is sorted by date so that multiple runs with the same date are occurring in a sequence. + * Otherwise this function may produce bogus. + * + * This function walks through all value labels and each history element for that value label and applies the following: + * * If the entry represents the measurement of a single run (i.e. ends with `_X`) and has the same entry label as the previous entry, track it to be combined into one entry. + * * If the entry does not represent a single run (i.e. does not end with `_X`) or has a different entry label, conclude the previous collection and start a new one. + * Concluding the previous collection means computing the average and the best of ten value and putting it into a single `ValueHistoryEntry` that combines all runs of the current + * collection. The first element of the collection determines the label of the entire collection. + * + * @param values raw values as read from performance metrics files (key is the label of the value, value is the history of values), + * whereas the ValueHistory.history` of each entry must be sorted ascending by date and run. + * @returns post processed values with the averages and best of ten values of multiple runs. + */ +export function processValues(values: Map): Map { + const processedValues = new Map(); + for (const [valueLabel, valueHistory] of values) { + const currentValueHistory = { valueLabel, history: new Array() } + + let currentCollection: ValueHistoryEntry[] | undefined = undefined; + let previousEntryLabel: string | undefined = undefined; + for (const entry of valueHistory.history) { + + const entryLabelMatchArray = entry.entryLabel.match(matchUntilUnderscoreOrDot); + const currentEntryLabel = entryLabelMatchArray ? entryLabelMatchArray[0] : undefined; + if (currentCollection && currentCollection.length > 0 && currentEntryLabel !== previousEntryLabel) { + // we have a collection and encountered a new entry label, so combine current collection into a single entry and reset + currentValueHistory.history.push(toCombinedValueHistoryEntry(currentCollection)); + currentCollection = undefined; + previousEntryLabel = undefined; + } + + if (currentCollection && currentEntryLabel === previousEntryLabel) { + // add to collection + currentCollection.push(entry); + } else { + // start new collection + currentCollection = [entry]; + } + + previousEntryLabel = currentEntryLabel; + } + + // combine last collection if there still is one + if (currentCollection && currentCollection.length > 0) { + currentValueHistory.history.push(toCombinedValueHistoryEntry(currentCollection)); + } + + processedValues.set(valueLabel, currentValueHistory); + } + return processedValues; +} + +function getRunNumber(label: string): number { + const match = label.match(/_([0-9]+)$/) + return match ? Number.parseInt(match[1]) : -1; +} + +function toCombinedValueHistoryEntry(entries: ValueHistoryEntry[]): ValueHistoryEntry { + const values = entries.map(entry => entry.value); + const combinesRuns = entries.map(entry => entry.entryLabel); + const matchArray = combinesRuns[0].match(matchUntilUnderscoreOrDot); + const entryLabel = matchArray ? matchArray[0] + (entries.length > 1 ? '[]' : '') : combinesRuns[0]; + const value = averageValue(values); + const best = bestValue(values); + return { + entryLabel, + value, + best, + combinesRuns + } +} + +function averageValue(values: number[]): number { + return values.reduce((a, b) => a + b, 0) / values.length; +} + +function bestValue(values: number[]): number { + return Math.min(...values); +} From 00d27f67524f619870a818eccaec410cfff7fc14 Mon Sep 17 00:00:00 2001 From: Philip Langer Date: Wed, 4 Oct 2023 21:15:55 +0200 Subject: [PATCH 4/4] Use again average that removes worst case --- scripts/performance-report.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/performance-report.ts b/scripts/performance-report.ts index f999f25fd..c638a8e01 100644 --- a/scripts/performance-report.ts +++ b/scripts/performance-report.ts @@ -294,7 +294,7 @@ export async function readEntries(path: string, values: string[]): Promise<{ val * * @param values raw values as read from performance metrics files (key is the label of the value, value is the history of values), * whereas the ValueHistory.history` of each entry must be sorted ascending by date and run. - * @returns post processed values with the averages and best of ten values of multiple runs. + * @returns post processed values with the average values and best of ten values of multiple runs. */ export function processValues(values: Map): Map { const processedValues = new Map(); @@ -356,6 +356,11 @@ function toCombinedValueHistoryEntry(entries: ValueHistoryEntry[]): ValueHistory } function averageValue(values: number[]): number { + if (values.length > 1) { + // remove worst outlier + values = values.sort(); + values.pop(); + } return values.reduce((a, b) => a + b, 0) / values.length; }