diff --git a/.github/actions/notify-slack-jobs-result/README.md b/.github/actions/notify-slack-jobs-result/README.md new file mode 100644 index 00000000000..298930c0d95 --- /dev/null +++ b/.github/actions/notify-slack-jobs-result/README.md @@ -0,0 +1,37 @@ +# Notify Slack Jobs Result + +Sends a Slack message to a specified channel detailing the results of one to many GHA job results using a regex. The job results will be grouped by the `github_job_name_regex` and displayed underneath the `message_title`, with the regex matching group displayed as an individual result. This is primarily designed for when you have test groups running in a matrix, and would like condensed reporting on their status by group. It's often accompanied by posting a Slack message before to start a thread, then attaching all the results to that thread like we do in the reporting section of the [live-testnet-test.yml workflow](../../workflows/live-testnet-tests.yml). Check out the example below, where we post an initial summary message, then use this action to thread together specific results: + +```yaml +message_title: Optimism Goerli +github_job_name_regex: ^Optimism Goerli (?.*?) Tests$ # Note that the regex MUST have a capturing group named "cap" +``` + +![example](image.png) + +## Inputs + +```yaml +inputs: + github_token: + description: "The GitHub token to use for authentication (usually ${{ github.token }})" + required: true + github_repository: + description: "The GitHub owner/repository to use for authentication (usually ${{ github.repository }}))" + required: true + workflow_run_id: + description: "The workflow run ID to get the results from (usually ${{ github.run_id }})" + required: true + github_job_name_regex: + description: "The regex to use to match 1..many job name(s) to collect results from. Should include a capture group named 'cap' for the part of the job name you want to display in the Slack message (e.g. ^Client Compatability Test (?.*?)$)" + required: true + message_title: + description: "The title of the Slack message" + required: true + slack_channel_id: + description: "The Slack channel ID to post the message to" + required: true + slack_thread_ts: + description: "The Slack thread timestamp to post the message to, handy for keeping multiple related results in a single thread" + required: false +``` diff --git a/.github/actions/notify-slack-jobs-result/action.yml b/.github/actions/notify-slack-jobs-result/action.yml new file mode 100644 index 00000000000..63840cfa393 --- /dev/null +++ b/.github/actions/notify-slack-jobs-result/action.yml @@ -0,0 +1,110 @@ +name: Notify Slack Jobs Result +description: Will send a notification in Slack for the result of a GitHub action run, typically for test results +inputs: + github_token: + description: "The GitHub token to use for authentication (usually github.token)" + required: true + github_repository: + description: "The GitHub owner/repository to use for authentication (usually github.repository))" + required: true + workflow_run_id: + description: "The workflow run ID to get the results from (usually github.run_id)" + required: true + github_job_name_regex: + description: "The regex to use to match 1..many job name(s) to collect results from. Should include a capture group named 'cap' for the part of the job name you want to display in the Slack message (e.g. ^Client Compatability Test (?.*?)$)" + required: true + message_title: + description: "The title of the Slack message" + required: true + slack_channel_id: + description: "The Slack channel ID to post the message to" + required: true + slack_bot_token: + description: "The Slack bot token to use for authentication which needs permission and an installed app in the channel" + required: true + slack_thread_ts: + description: "The Slack thread timestamp to post the message to, handy for keeping multiple related results in a single thread" + required: false + +runs: + using: composite + steps: + - name: Get Results + shell: bash + id: test-results + run: | + # I feel like there's some clever, fully jq way to do this, but I ain't got the motivation to figure it out + echo "Querying test results at https://api.github.com/repos/${{inputs.github_repository}}/actions/runs/${{ inputs.workflow_run_id }}/jobs" + + PARSED_RESULTS=$(curl \ + -H "Authorization: Bearer ${{ inputs.github_token }}" \ + 'https://api.github.com/repos/${{inputs.github_repository}}/actions/runs/${{ inputs.workflow_run_id }}/jobs' \ + | jq -r --arg pattern "${{ inputs.github_job_name_regex }}" '.jobs[] + | select(.name | test($pattern)) as $job + | $job.steps[] + | select(.name == "Run Tests") + | { conclusion: (if .conclusion == "success" then ":white_check_mark:" else ":x:" end), cap: ("*" + ($job.name | capture($pattern).cap) + "*"), html_url: $job.html_url }') + + echo "Parsed Results:" + echo $PARSED_RESULTS + + ALL_SUCCESS=true + echo "Checking for failures" + echo "$PARSED_RESULTS" | jq -s | jq -r '.[] | select(.conclusion != ":white_check_mark:")' + for row in $(echo "$PARSED_RESULTS" | jq -s | jq -r '.[] | select(.conclusion != ":white_check_mark:")'); do + ALL_SUCCESS=false + break + done + echo "Success: $ALL_SUCCESS" + + echo all_success=$ALL_SUCCESS >> $GITHUB_OUTPUT + + FORMATTED_RESULTS=$(echo $PARSED_RESULTS | jq -s '[.[] + | { + conclusion: .conclusion, + cap: .cap, + html_url: .html_url + } + ] + | map("{\"type\": \"section\", \"text\": {\"type\": \"mrkdwn\", \"text\": \"<\(.html_url)|\(.cap)>: \(.conclusion)\"}}") + | join(",")') + + echo "Formatted Results:" + echo $FORMATTED_RESULTS + + # Cleans out backslashes and quotes from jq + CLEAN_RESULTS=$(echo "$FORMATTED_RESULTS" | sed 's/\\\"/"/g' | sed 's/^"//;s/"$//') + + echo "Clean Results" + echo $CLEAN_RESULTS + + echo results=$CLEAN_RESULTS >> $GITHUB_OUTPUT + - name: Post Results + uses: slackapi/slack-github-action@e28cf165c92ffef168d23c5c9000cffc8a25e117 # v1.24.0 + env: + SLACK_BOT_TOKEN: ${{ inputs.slack_bot_token }} + with: + channel-id: ${{ inputs.slack_channel_id }} + payload: | + { + "thread_ts": "${{ inputs.slack_thread_ts }}", + "attachments": [ + { + "color": "${{ steps.test-results.outputs.all_success == 'true' && '#2E7D32' || '#C62828' }}", + "blocks": [ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "${{ inputs.message_title }} ${{ steps.test-results.outputs.all_success == 'true' && ':white_check_mark:' || ':x:'}}", + "emoji": true + } + }, + { + "type": "divider" + }, + ${{ steps.test-results.outputs.results }} + ] + } + ] + } diff --git a/.github/actions/notify-slack-jobs-result/image.png b/.github/actions/notify-slack-jobs-result/image.png new file mode 100644 index 00000000000..3bd398101fb Binary files /dev/null and b/.github/actions/notify-slack-jobs-result/image.png differ diff --git a/.github/workflows/client-compatibility-tests.yml b/.github/workflows/client-compatibility-tests.yml new file mode 100644 index 00000000000..8688e51e1df --- /dev/null +++ b/.github/workflows/client-compatibility-tests.yml @@ -0,0 +1,269 @@ +name: Client Compatibility Tests +on: + schedule: + - cron: "30 5 * * *" # Run every night at midnight + 30min EST + push: + tags: + - "*" + workflow_dispatch: + +jobs: + build-chainlink: + environment: integration + permissions: + id-token: write + contents: read + name: Build Chainlink Image + runs-on: ubuntu-latest + steps: + - name: Collect Metrics + id: collect-gha-metrics + uses: smartcontractkit/push-gha-metrics-action@d1618b772a97fd87e6505de97b872ee0b1f1729a # v2.0.2 + with: + basic-auth: ${{ secrets.GRAFANA_CLOUD_BASIC_AUTH }} + hostname: ${{ secrets.GRAFANA_CLOUD_HOST }} + this-job-name: Build Chainlink Image + continue-on-error: true + - name: Checkout the repo + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + ref: ${{ github.event.pull_request.head.sha || github.event.merge_group.head_sha }} + - name: Build Chainlink Image + uses: ./.github/actions/build-chainlink-image + with: + tag_suffix: "" + dockerfile: core/chainlink.Dockerfile + git_commit_sha: ${{ github.sha }} + GRAFANA_CLOUD_BASIC_AUTH: ${{ secrets.GRAFANA_CLOUD_BASIC_AUTH }} + GRAFANA_CLOUD_HOST: ${{ secrets.GRAFANA_CLOUD_HOST }} + AWS_REGION: ${{ secrets.QA_AWS_REGION }} + AWS_ROLE_TO_ASSUME: ${{ secrets.QA_AWS_ROLE_TO_ASSUME }} + + client-compatibility-matrix: + environment: integration + permissions: + checks: write + pull-requests: write + id-token: write + contents: read + needs: [build-chainlink] + env: + SELECTED_NETWORKS: SIMULATED,SIMULATED_1,SIMULATED_2 + CHAINLINK_COMMIT_SHA: ${{ github.sha }} + CHAINLINK_ENV_USER: ${{ github.actor }} + TEST_LOG_LEVEL: debug + strategy: + fail-fast: false + matrix: + product: + - name: ocr-geth + os: ubuntu-latest + run: -run TestOCRBasic + file: ocr + client: geth + pyroscope_env: ci-smoke-ocr-evm-simulated + # Uncomment, when https://smartcontract-it.atlassian.net/browse/TT-753 is DONE + # - name: ocr-nethermind + # run: -run TestOCRBasic + # file: ocr + # client: nethermind + # pyroscope_env: ci-smoke-ocr-evm-simulated + - name: ocr-besu + run: -run TestOCRBasic + file: ocr + client: besu + pyroscope_env: ci-smoke-ocr-evm-simulated + - name: ocr-erigon + run: -run TestOCRBasic + file: ocr + client: erigon + pyroscope_env: ci-smoke-ocr-evm-simulated + - name: ocr2-geth + run: -run TestOCRv2Basic + file: ocr2 + client: geth + pyroscope_env: ci-smoke-ocr2-evm-simulated + # Uncomment, when https://smartcontract-it.atlassian.net/browse/TT-753 is DONE + # - name: ocr2-nethermind + # run: -run TestOCRv2Basic + # file: ocr2 + # client: nethermind + # pyroscope_env: ci-smoke-ocr2-evm-simulated + - name: ocr2-besu + run: -run TestOCRv2Basic + file: ocr2 + client: besu + pyroscope_env: ci-smoke-ocr2-evm-simulated + - name: ocr2-erigon + run: -run TestOCRv2Basic + file: ocr2 + client: erigon + pyroscope_env: ci-smoke-ocr2-evm-simulated + runs-on: ubuntu-latest + name: Client Compatibility Test ${{ matrix.product.name }} + steps: + - name: Checkout the repo + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + ref: ${{ github.event.pull_request.head.sha || github.event.merge_group.head_sha }} + - name: Build Go Test Command + id: build-go-test-command + run: | + # if the matrix.product.run is set, use it for a different command + if [ "${{ matrix.product.run }}" != "" ]; then + echo "run_command=${{ matrix.product.run }} ./smoke/${{ matrix.product.file }}_test.go" >> "$GITHUB_OUTPUT" + else + echo "run_command=./smoke/${{ matrix.product.name }}_test.go" >> "$GITHUB_OUTPUT" + fi + - name: Run Tests + uses: smartcontractkit/chainlink-github-actions/chainlink-testing-framework/run-tests@e865e376b8c2d594028c8d645dd6c47169b72974 # v2.2.16 + env: + PYROSCOPE_SERVER: ${{ matrix.product.pyroscope_env == '' && '' || !startsWith(github.ref, 'refs/tags/') && '' || secrets.QA_PYROSCOPE_INSTANCE }} # Avoid sending blank envs https://github.com/orgs/community/discussions/25725 + PYROSCOPE_ENVIRONMENT: ${{ matrix.product.pyroscope_env }} + PYROSCOPE_KEY: ${{ secrets.QA_PYROSCOPE_KEY }} + ETH2_EL_CLIENT: ${{matrix.product.client}} + LOKI_TENANT_ID: ${{ vars.LOKI_TENANT_ID }} + LOKI_URL: ${{ secrets.LOKI_URL }} + LOKI_BASIC_AUTH: ${{ secrets.LOKI_BASIC_AUTH }} + LOGSTREAM_LOG_TARGETS: ${{ vars.LOGSTREAM_LOG_TARGETS }} + GRAFANA_URL: ${{ vars.GRAFANA_URL }} + GRAFANA_DATASOURCE: ${{ vars.GRAFANA_DATASOURCE }} + RUN_ID: ${{ github.run_id }} + with: + test_command_to_run: cd ./integration-tests && go test -timeout 30m -count=1 -json ${{ steps.build-go-test-command.outputs.run_command }} 2>&1 | tee /tmp/gotest.log | gotestfmt + test_download_vendor_packages_command: cd ./integration-tests && go mod download + cl_repo: ${{ env.CHAINLINK_IMAGE }} + cl_image_tag: ${{ github.sha }}${{ matrix.product.tag_suffix }} + aws_registries: ${{ secrets.QA_AWS_ACCOUNT_NUMBER }} + artifacts_name: ${{ matrix.product.name }}-test-logs + artifacts_location: ./integration-tests/smoke/logs/ + publish_check_name: ${{ matrix.product.name }} + token: ${{ secrets.GITHUB_TOKEN }} + go_mod_path: ./integration-tests/go.mod + cache_key_id: core-e2e-${{ env.MOD_CACHE_VERSION }} + cache_restore_only: "true" + QA_AWS_REGION: ${{ secrets.QA_AWS_REGION }} + QA_AWS_ROLE_TO_ASSUME: ${{ secrets.QA_AWS_ROLE_TO_ASSUME }} + QA_KUBECONFIG: "" + - name: Collect Metrics + if: always() + id: collect-gha-metrics + uses: smartcontractkit/push-gha-metrics-action@d1618b772a97fd87e6505de97b872ee0b1f1729a # v2.0.2 + with: + basic-auth: ${{ secrets.GRAFANA_CLOUD_BASIC_AUTH }} + hostname: ${{ secrets.GRAFANA_CLOUD_HOST }} + this-job-name: ETH Smoke Tests ${{ matrix.product.name }}${{ matrix.product.tag_suffix }} + test-results-file: '{"testType":"go","filePath":"/tmp/gotest.log"}' + continue-on-error: true + - name: Print failed test summary + if: always() + run: | + directory="./integration-tests/smoke/.test_summary" + files=("$directory"/*) + if [ -d "$directory" ]; then + echo "Test summary folder found" + if [ ${#files[@]} -gt 0 ]; then + first_file="${files[0]}" + echo "Name of the first test summary file: $(basename "$first_file")" + echo "### Failed Test Execution Logs Dashboard (over VPN):" >> $GITHUB_STEP_SUMMARY + cat "$first_file" | jq -r '.loki[] | "* [\(.test_name)](\(.value))"' >> $GITHUB_STEP_SUMMARY + if [ ${#files[@]} -gt 1 ]; then + echo "Found more than one test summary file. This is incorrect, there should be only one file" + fi + else + echo "Test summary directory is empty. This should not happen" + fi + else + echo "No test summary folder found. If no test failed or log collection wasn't explicitly requested this is correct. Exiting" + fi + + start-slack-thread: + name: Start Slack Thread + if: ${{ always() && needs.*.result != 'skipped' && needs.*.result != 'cancelled' }} + environment: integration + outputs: + thread_ts: ${{ steps.slack.outputs.thread_ts }} + permissions: + checks: write + pull-requests: write + id-token: write + contents: read + runs-on: ubuntu-latest + needs: [client-compatibility-matrix] + steps: + - name: Debug Result + run: echo ${{ join(needs.*.result, ',') }} + - name: Main Slack Notification + uses: slackapi/slack-github-action@e28cf165c92ffef168d23c5c9000cffc8a25e117 # v1.24.0 + id: slack + with: + channel-id: ${{ secrets.QA_SLACK_CHANNEL }} + payload: | + { + "attachments": [ + { + "color": "${{ contains(join(needs.*.result, ','), 'failure') && '#C62828' || '#2E7D32' }}", + "blocks": [ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "Live Smoke Test Results ${{ contains(join(needs.*.result, ','), 'failure') && ':x:' || ':white_check_mark:'}}", + "emoji": true + } + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "${{ contains(join(needs.*.result, ','), 'failure') && 'Some tests failed, notifying <@U01Q4N37KFG> and <@U060CGGPY8H>' || 'All Good!' }}" + } + }, + { + "type": "divider" + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "<${{ github.server_url }}/${{ github.repository }}/releases/tag/${{ github.ref_name }}|${{ github.ref_name }}> | <${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}|${{ github.sha }}> | <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|Run>" + } + } + ] + } + ] + } + env: + SLACK_BOT_TOKEN: ${{ secrets.QA_SLACK_API_KEY }} + + post-test-results-to-slack: + name: Post Test Results for ${{matrix.product}} + if: ${{ always() && needs.*.result != 'skipped' && needs.*.result != 'cancelled' }} + environment: integration + permissions: + checks: write + pull-requests: write + id-token: write + contents: read + runs-on: ubuntu-latest + needs: start-slack-thread + strategy: + fail-fast: false + matrix: + product: [ocr, ocr2] + steps: + - name: Checkout the repo + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + ref: ${{ github.event.pull_request.head.sha || github.event.merge_group.head_sha }} + - name: Post Test Results to Slack + uses: ./.github/actions/notify-slack-jobs-result + with: + github_token: ${{ github.token }} + github_repository: ${{ github.repository }} + workflow_run_id: ${{ github.run_id }} + github_job_name_regex: ^Client Compatability Test ${{ matrix.product }}-(?.*?)$ + message_title: ${{ matrix.product }} + slack_channel_id: ${{ secrets.QA_SLACK_CHANNEL }} + slack_bot_token: ${{ secrets.QA_SLACK_API_KEY }} + slack_thread_ts: ${{ needs.start-slack-thread.outputs.thread_ts }} diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index e8243c8cdfa..0fa9370b60d 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -327,75 +327,23 @@ jobs: os: ubuntu-latest run: -run TestOCRJobReplacement file: ocr - pyroscope_env: ci-smoke-ocr-evm-simulated - - name: ocr-geth - nodes: 1 - os: ubuntu-latest - run: -run TestOCRBasic - file: ocr - client: geth - pyroscope_env: ci-smoke-ocr-evm-simulated - # Uncomment, when https://smartcontract-it.atlassian.net/browse/TT-753 is DONE - # - name: ocr-nethermind - # nodes: 1 - # os: ubuntu20.04-8cores-32GB - # run: -run TestOCRBasic - # file: ocr - # client: nethermind - # pyroscope_env: ci-smoke-ocr-evm-simulated - - name: ocr-besu - nodes: 1 - os: ubuntu20.04-8cores-32GB - run: -run TestOCRBasic - file: ocr - client: besu - pyroscope_env: ci-smoke-ocr-evm-simulated - - name: ocr-erigon - nodes: 1 - os: ubuntu20.04-8cores-32GB - run: -run TestOCRBasic - file: ocr - client: erigon - pyroscope_env: ci-smoke-ocr-evm-simulated + pyroscope_env: ci-smoke-ocr-evm-simulated - name: ocr2 nodes: 1 - os: ubuntu20.04-8cores-32GB + os: ubuntu-latest run: -run TestOCRv2JobReplacement file: ocr2 pyroscope_env: ci-smoke-ocr2-evm-simulated - - name: ocr2-geth - nodes: 1 - os: ubuntu20.04-8cores-32GB - run: -run TestOCRv2Basic - file: ocr2 - client: geth - pyroscope_env: ci-smoke-ocr2-evm-simulated - # Uncomment, when https://smartcontract-it.atlassian.net/browse/TT-753 is DONE - # - name: ocr2-nethermind - # nodes: 1 - # os: ubuntu20.04-8cores-32GB - # run: -run TestOCRv2Basic - # file: ocr2 - # client: nethermind - # pyroscope_env: ci-smoke-ocr2-evm-simulated - - name: ocr2-besu - nodes: 1 - os: ubuntu20.04-8cores-32GB - run: -run TestOCRv2Basic - file: ocr2 - client: besu - pyroscope_env: ci-smoke-ocr2-evm-simulated - - name: ocr2-erigon + - name: ocr2 nodes: 1 - os: ubuntu20.04-8cores-32GB + os: ubuntu-latest run: -run TestOCRv2Basic file: ocr2 - client: erigon - pyroscope_env: ci-smoke-ocr2-evm-simulated + pyroscope_env: ci-smoke-ocr2-evm-simulated - name: ocr2 nodes: 1 - os: ubuntu20.04-8cores-32GB - pyroscope_env: ci-smoke-ocr2-evm-simulated + os: ubuntu-latest + pyroscope_env: ci-smoke-ocr2-plugins-evm-simulated tag_suffix: "-plugins" - name: runlog nodes: 1 @@ -497,7 +445,6 @@ jobs: PYROSCOPE_SERVER: ${{ matrix.product.pyroscope_env == '' && '' || !startsWith(github.ref, 'refs/tags/') && '' || secrets.QA_PYROSCOPE_INSTANCE }} # Avoid sending blank envs https://github.com/orgs/community/discussions/25725 PYROSCOPE_ENVIRONMENT: ${{ matrix.product.pyroscope_env }} PYROSCOPE_KEY: ${{ secrets.QA_PYROSCOPE_KEY }} - ETH2_EL_CLIENT: ${{matrix.product.client}} LOKI_TENANT_ID: ${{ vars.LOKI_TENANT_ID }} LOKI_URL: ${{ secrets.LOKI_URL }} LOKI_BASIC_AUTH: ${{ secrets.LOKI_BASIC_AUTH }} diff --git a/.github/workflows/live-testnet-tests.yml b/.github/workflows/live-testnet-tests.yml index 7a12ab6d6a0..2b5ecb2d5da 100644 --- a/.github/workflows/live-testnet-tests.yml +++ b/.github/workflows/live-testnet-tests.yml @@ -10,7 +10,7 @@ name: Live Testnet Tests on: schedule: - - cron: "0 5 * * *" # Run every Sunday at midnight EST + - cron: "0 5 * * *" # Run every night at midnight EST push: tags: - "*" @@ -144,7 +144,7 @@ jobs: id-token: write contents: read runs-on: ubuntu-latest - needs: [sepolia-smoke-tests, bsc-testnet-tests, optimism-sepolia-smoke-tests, arbitrum-sepolia-smoke-tests, base-goerli-smoke-tests, base-sepolia-smoke-tests, polygon-mumbai-smoke-tests, avalanche-fuji-smoke-tests, fantom-testnet-smoke-tests, celo-alfajores-smoke-tests, linea-goerli-smoke-tests] + needs: [sepolia-smoke-tests, optimism-sepolia-smoke-tests, arbitrum-sepolia-smoke-tests, base-goerli-smoke-tests, base-sepolia-smoke-tests, polygon-mumbai-smoke-tests, avalanche-fuji-smoke-tests, fantom-testnet-smoke-tests, celo-alfajores-smoke-tests, linea-goerli-smoke-tests] steps: - name: Debug Result run: echo ${{ join(needs.*.result, ',') }} @@ -205,86 +205,23 @@ jobs: strategy: fail-fast: false matrix: - network: [Sepolia, Optimism Sepolia, Arbitrum Sepolia, Base Goerli, Base Sepolia, Polygon Mumbai, Avalanche Fuji, Fantom Testnet, Celo Alfajores, Linea Goerli, BSC Testnet] + network: [Sepolia, Optimism Sepolia, Arbitrum Sepolia, Base Goerli, Base Sepolia, Polygon Mumbai, Avalanche Fuji, Fantom Testnet, Celo Alfajores, Linea Goerli] steps: - - name: Get Results - id: test-results - run: | - # I feel like there's some clever, fully jq way to do this, but I ain't got the motivation to figure it out - echo "Querying test results" - - PARSED_RESULTS=$(curl \ - -H "Authorization: Bearer ${{ github.token }}" \ - 'https://api.github.com/repos/${{github.repository}}/actions/runs/${{ github.run_id }}/jobs' \ - | jq -r --arg pattern "^${{ matrix.network }} (?.*?) Tests$" '.jobs[] - | select(.name | test($pattern)) as $job - | $job.steps[] - | select(.name == "Run Tests") - | { conclusion: (if .conclusion == "success" then ":white_check_mark:" else ":x:" end), product: ("*" + ($job.name | capture($pattern).product) + "*") }') - - echo "Parsed Results:" - echo $PARSED_RESULTS - - ALL_SUCCESS=true - echo "Checking for failures" - echo "$PARSED_RESULTS" | jq -s | jq -r '.[] | select(.conclusion != ":white_check_mark:")' - for row in $(echo "$PARSED_RESULTS" | jq -s | jq -r '.[] | select(.conclusion != ":white_check_mark:")'); do - ALL_SUCCESS=false - break - done - echo "Success: $ALL_SUCCESS" - - echo all_success=$ALL_SUCCESS >> $GITHUB_OUTPUT - - FORMATTED_RESULTS=$(echo $PARSED_RESULTS | jq -s '[.[] - | { - conclusion: .conclusion, - product: .product - } - ] - | map("{\"type\": \"section\", \"text\": {\"type\": \"mrkdwn\", \"text\": \"\(.product): \(.conclusion)\"}}") - | join(",")') - - echo "Formatted Results:" - echo $FORMATTED_RESULTS - - # Cleans out backslashes and quotes from jq - CLEAN_RESULTS=$(echo "$FORMATTED_RESULTS" | sed 's/\\\"/"/g' | sed 's/^"//;s/"$//') - - echo "Clean Results" - echo $CLEAN_RESULTS - - echo results=$CLEAN_RESULTS >> $GITHUB_OUTPUT - - - name: Test Details - uses: slackapi/slack-github-action@e28cf165c92ffef168d23c5c9000cffc8a25e117 # v1.24.0 + - name: Checkout the repo + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: - channel-id: ${{ secrets.QA_SLACK_CHANNEL }} - payload: | - { - "thread_ts": "${{ needs.start-slack-thread.outputs.thread_ts }}", - "attachments": [ - { - "color": "${{ steps.test-results.outputs.all_success == 'true' && '#2E7D32' || '#C62828' }}", - "blocks": [ - { - "type": "header", - "text": { - "type": "plain_text", - "text": "${{ matrix.network }} ${{ steps.test-results.outputs.all_success == 'true' && ':white_check_mark:' || ':x:'}}", - "emoji": true - } - }, - { - "type": "divider" - }, - ${{ steps.test-results.outputs.results }} - ] - } - ] - } - env: - SLACK_BOT_TOKEN: ${{ secrets.QA_SLACK_API_KEY }} + ref: ${{ github.event.pull_request.head.sha || github.event.merge_group.head_sha }} + - name: Post Test Results + uses: ./.github/actions/notify-slack-jobs-result + with: + github_token: ${{ github.token }} + github_repository: ${{ github.repository }} + workflow_run_id: ${{ github.run_id }} + github_job_name_regex: ^${{ matrix.network }} (?.*?) Tests$ + message_title: ${{ matrix.network }} + slack_channel_id: ${{ secrets.QA_SLACK_CHANNEL }} + slack_bot_token: ${{ secrets.QA_SLACK_API_KEY }} + slack_thread_ts: ${{ needs.start-slack-thread.outputs.thread_ts }} # End Reporting Jobs @@ -339,6 +276,8 @@ jobs: QA_KUBECONFIG: ${{ secrets.QA_KUBECONFIG }} bsc-testnet-tests: + # TODO: BSC RPCs are all in a bad state right now, so we're skipping these tests until they're fixed + if: false environment: integration permissions: checks: write diff --git a/core/services/relay/evm/mercury/wsrpc/cache/cache_set.go b/core/services/relay/evm/mercury/wsrpc/cache/cache_set.go index 98388442d9a..01d47743950 100644 --- a/core/services/relay/evm/mercury/wsrpc/cache/cache_set.go +++ b/core/services/relay/evm/mercury/wsrpc/cache/cache_set.go @@ -46,7 +46,7 @@ func newCacheSet(lggr logger.Logger, cfg Config) *cacheSet { func (cs *cacheSet) Start(context.Context) error { return cs.StartOnce("CacheSet", func() error { - cs.lggr.Debugw("CacheSet starting", "config", cs.cfg) + cs.lggr.Debugw("CacheSet starting", "config", cs.cfg, "cachingEnabled", cs.cfg.LatestReportTTL > 0) return nil }) } @@ -65,6 +65,10 @@ func (cs *cacheSet) Close() error { } func (cs *cacheSet) Get(ctx context.Context, client Client) (f Fetcher, err error) { + if cs.cfg.LatestReportTTL == 0 { + // caching disabled + return client, nil + } ok := cs.IfStarted(func() { f, err = cs.get(ctx, client) }) diff --git a/core/services/relay/evm/mercury/wsrpc/cache/cache_set_test.go b/core/services/relay/evm/mercury/wsrpc/cache/cache_set_test.go index 7d754b8326e..59be76ed265 100644 --- a/core/services/relay/evm/mercury/wsrpc/cache/cache_set_test.go +++ b/core/services/relay/evm/mercury/wsrpc/cache/cache_set_test.go @@ -13,7 +13,8 @@ import ( func Test_CacheSet(t *testing.T) { lggr := logger.TestLogger(t) - cs := newCacheSet(lggr, Config{}) + cs := newCacheSet(lggr, Config{LatestReportTTL: 1}) + disabledCs := newCacheSet(lggr, Config{LatestReportTTL: 0}) ctx := testutils.Context(t) servicetest.Run(t, cs) @@ -22,6 +23,16 @@ func Test_CacheSet(t *testing.T) { var err error var f Fetcher + t.Run("with caching disabled, returns the passed client", func(t *testing.T) { + assert.Len(t, disabledCs.caches, 0) + + f, err = disabledCs.Get(ctx, c) + require.NoError(t, err) + + assert.Same(t, c, f) + assert.Len(t, disabledCs.caches, 0) + }) + t.Run("with virgin cacheset, makes new entry and returns it", func(t *testing.T) { assert.Len(t, cs.caches, 0) diff --git a/core/services/relay/evm/mercury/wsrpc/client.go b/core/services/relay/evm/mercury/wsrpc/client.go index 5b6bfa1a9b0..c9533717757 100644 --- a/core/services/relay/evm/mercury/wsrpc/client.go +++ b/core/services/relay/evm/mercury/wsrpc/client.go @@ -24,9 +24,9 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/utils" ) -// MaxConsecutiveTransmitFailures controls how many consecutive requests are +// MaxConsecutiveRequestFailures controls how many consecutive requests are // allowed to time out before we reset the connection -const MaxConsecutiveTransmitFailures = 5 +const MaxConsecutiveRequestFailures = 10 var ( timeoutCount = promauto.NewCounterVec(prometheus.CounterOpts{ @@ -55,7 +55,7 @@ var ( ) connectionResetCount = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "mercury_connection_reset_count", - Help: fmt.Sprintf("Running count of times connection to mercury server has been reset (connection reset happens automatically after %d consecutive transmit failures)", MaxConsecutiveTransmitFailures), + Help: fmt.Sprintf("Running count of times connection to mercury server has been reset (connection reset happens automatically after %d consecutive request failures)", MaxConsecutiveRequestFailures), }, []string{"serverURL"}, ) @@ -256,13 +256,26 @@ func (w *client) Transmit(ctx context.Context, req *pb.TransmitRequest) (resp *p return nil, errors.Wrap(err, "Transmit call failed") } resp, err = w.rawClient.Transmit(ctx, req) + w.handleTimeout(err) + if err != nil { + w.logger.Warnw("Transmit call failed due to networking error", "err", err, "resp", resp) + incRequestStatusMetric(statusFailed) + } else { + w.logger.Tracew("Transmit call succeeded", "resp", resp) + incRequestStatusMetric(statusSuccess) + setRequestLatencyMetric(float64(time.Since(start).Milliseconds())) + } + return +} + +func (w *client) handleTimeout(err error) { if errors.Is(err, context.DeadlineExceeded) { w.timeoutCountMetric.Inc() cnt := w.consecutiveTimeoutCnt.Add(1) - if cnt == MaxConsecutiveTransmitFailures { + if cnt == MaxConsecutiveRequestFailures { w.logger.Errorf("Timed out on %d consecutive transmits, resetting transport", cnt) - // NOTE: If we get 5+ request timeouts in a row, close and re-open - // the websocket connection. + // NOTE: If we get at least MaxConsecutiveRequestFailures request + // timeouts in a row, close and re-open the websocket connection. // // This *shouldn't* be necessary in theory (ideally, wsrpc would // handle it for us) but it acts as a "belts and braces" approach @@ -271,11 +284,11 @@ func (w *client) Transmit(ctx context.Context, req *pb.TransmitRequest) (resp *p select { case w.chResetTransport <- struct{}{}: default: - // This can happen if we had 5 consecutive timeouts, already - // sent a reset signal, then the connection started working - // again (resetting the count) then we got 5 additional - // failures before the runloop was able to close the bad - // connection. + // This can happen if we had MaxConsecutiveRequestFailures + // consecutive timeouts, already sent a reset signal, then the + // connection started working again (resetting the count) then + // we got MaxConsecutiveRequestFailures additional failures + // before the runloop was able to close the bad connection. // // It should be safe to just ignore in this case. // @@ -286,15 +299,6 @@ func (w *client) Transmit(ctx context.Context, req *pb.TransmitRequest) (resp *p } else { w.consecutiveTimeoutCnt.Store(0) } - if err != nil { - w.logger.Warnw("Transmit call failed due to networking error", "err", err, "resp", resp) - incRequestStatusMetric(statusFailed) - } else { - w.logger.Tracew("Transmit call succeeded", "resp", resp) - incRequestStatusMetric(statusSuccess) - setRequestLatencyMetric(float64(time.Since(start).Milliseconds())) - } - return } func (w *client) LatestReport(ctx context.Context, req *pb.LatestReportRequest) (resp *pb.LatestReportResponse, err error) { @@ -306,6 +310,7 @@ func (w *client) LatestReport(ctx context.Context, req *pb.LatestReportRequest) var cached bool if w.cache == nil { resp, err = w.rawClient.LatestReport(ctx, req) + w.handleTimeout(err) } else { cached = true resp, err = w.cache.LatestReport(ctx, req) diff --git a/core/services/relay/evm/mercury/wsrpc/client_test.go b/core/services/relay/evm/mercury/wsrpc/client_test.go index f265d54879c..10712461ae1 100644 --- a/core/services/relay/evm/mercury/wsrpc/client_test.go +++ b/core/services/relay/evm/mercury/wsrpc/client_test.go @@ -54,7 +54,7 @@ func Test_Client_Transmit(t *testing.T) { noopCacheSet := newNoopCacheSet() - t.Run("sends on reset channel after MaxConsecutiveTransmitFailures timed out transmits", func(t *testing.T) { + t.Run("sends on reset channel after MaxConsecutiveRequestFailures timed out transmits", func(t *testing.T) { calls := 0 transmitErr := context.DeadlineExceeded wsrpcClient := &mocks.MockWSRPCClient{ @@ -70,11 +70,11 @@ func Test_Client_Transmit(t *testing.T) { c.conn = conn c.rawClient = wsrpcClient require.NoError(t, c.StartOnce("Mock WSRPC Client", func() error { return nil })) - for i := 1; i < MaxConsecutiveTransmitFailures; i++ { + for i := 1; i < MaxConsecutiveRequestFailures; i++ { _, err := c.Transmit(ctx, req) require.EqualError(t, err, "context deadline exceeded") } - assert.Equal(t, 4, calls) + assert.Equal(t, MaxConsecutiveRequestFailures-1, calls) select { case <-c.chResetTransport: t.Fatal("unexpected send on chResetTransport") @@ -82,7 +82,7 @@ func Test_Client_Transmit(t *testing.T) { } _, err := c.Transmit(ctx, req) require.EqualError(t, err, "context deadline exceeded") - assert.Equal(t, 5, calls) + assert.Equal(t, MaxConsecutiveRequestFailures, calls) select { case <-c.chResetTransport: default: @@ -94,14 +94,14 @@ func Test_Client_Transmit(t *testing.T) { // working transmit to reset counter _, err = c.Transmit(ctx, req) require.NoError(t, err) - assert.Equal(t, 6, calls) + assert.Equal(t, MaxConsecutiveRequestFailures+1, calls) assert.Equal(t, 0, int(c.consecutiveTimeoutCnt.Load())) }) t.Run("doesn't block in case channel is full", func(t *testing.T) { transmitErr = context.DeadlineExceeded c.chResetTransport = nil // simulate full channel - for i := 0; i < MaxConsecutiveTransmitFailures; i++ { + for i := 0; i < MaxConsecutiveRequestFailures; i++ { _, err := c.Transmit(ctx, req) require.EqualError(t, err, "context deadline exceeded") } @@ -162,10 +162,7 @@ func Test_Client_LatestReport(t *testing.T) { // simulate start without dialling require.NoError(t, c.StartOnce("Mock WSRPC Client", func() error { return nil })) - var err error servicetest.Run(t, cacheSet) - c.cache, err = cacheSet.Get(ctx, c) - require.NoError(t, err) for i := 0; i < 5; i++ { r, err := c.LatestReport(ctx, req)