Skip to content

Check for internal and external broken links #1

Check for internal and external broken links

Check for internal and external broken links #1

name: Check for internal and external broken links
on:
workflow_dispatch:
schedule:
- cron: "0 12 * * *"
jobs:
check-broken-links:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Setup node
uses: actions/setup-node@v4
with:
node-version: "lts/*"
- name: Set Vercel URL output
id: vercel
run: |
echo "url=https://docs.cloudquery.io" >> $GITHUB_OUTPUT
- name: Check for broken links
# We need to exclude some hosts that protect against bots
run: |
set -o pipefail
npx broken-link-checker --requests 100 --host-requests 10 -f -r -v -o --filter-level 3 \
--exclude "${{ steps.vercel.outputs.url }}/logo" \
--exclude "${{ steps.vercel.outputs.url }}/docs/plugins/sources/*/tables" \
--exclude "${{ steps.vercel.outputs.url }}/integrations/*" \
--exclude "${{ steps.vercel.outputs.url }}/buy/*" \
--exclude "${{ steps.vercel.outputs.url }}/contact" \
--exclude "${{ steps.vercel.outputs.url }}/contact-policies" \
--exclude "${{ steps.vercel.outputs.url }}/register_for_cloud" \
--exclude "${{ steps.vercel.outputs.url }}/apply_for_job" \
--exclude "${{ steps.vercel.outputs.url }}/careers" \
--exclude "${{ steps.vercel.outputs.url }}/register-for-marketplace" \
--exclude "${{ steps.vercel.outputs.url }}/blog" \
--exclude "${{ steps.vercel.outputs.url }}/blog/*" \
--exclude "${{ steps.vercel.outputs.url }}/integrations" \
--exclude "${{ steps.vercel.outputs.url }}/integrations/*" \
--exclude linkedin \
--exclude cloudquery.io/discord \
--exclude cloudquery.io/buy \
--exclude fonts.gstatic.com \
--exclude fonts.googleapis.com \
--exclude github.com \
--exclude support.google.com \
--exclude work-bench.com \
--exclude api.typeform.com \
--exclude api.eu.typeform.com \
--exclude mailchimp.com \
--exclude returngis.net \
--exclude plausible.io \
--exclude localhost \
--exclude twitter.com \
--exclude mongodb.com \
--exclude regex101.com \
--exclude help.shopify.com/en/manual/apps/app-types/custom-apps \
--exclude hub.cloudquery.io \
--exclude www.g2.com \
--exclude cql.ink \
--exclude egghead.io \
${{ steps.vercel.outputs.url }}/docs \
| grep -v '───OK───' | grep -v '──SKIP──' | grep -v '0 broken'
- name: Slack Notify
uses: ravsamhq/notify-slack-action@v2 # https://github.com/marketplace/actions/notify-slack-action
if: always()
with:
status: ${{ job.status }}
notify_when: 'failure'
notification_title: 'Broken links detected :bang:'
footer: '<{repo_url}|{repo}>'
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK }}